From f98df5ed0a670f2c4c1a50d7901acbb862a247c7 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 30 Jan 2020 17:31:52 +1100 Subject: powerpc/pseries/vio: Remove stray #ifdef CONFIG_PPC_PSERIES vio.c is in platforms/pseries, which is only built if PPC_PSERIES=y. In other words, this ifdef is pointless. Signed-off-by: Oliver O'Halloran Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200130063153.19915-1-oohall@gmail.com --- arch/powerpc/platforms/pseries/vio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index f682b7babc09..37f1f25ba804 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1628,7 +1628,6 @@ const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length) } EXPORT_SYMBOL(vio_get_attribute); -#ifdef CONFIG_PPC_PSERIES /* vio_find_name() - internal because only vio.c knows how we formatted the * kobject name */ @@ -1698,7 +1697,6 @@ int vio_disable_interrupts(struct vio_dev *dev) return rc; } EXPORT_SYMBOL(vio_disable_interrupts); -#endif /* CONFIG_PPC_PSERIES */ static int __init vio_init(void) { -- cgit v1.2.3-59-g8ed1b From 8cbb00a901037986f4066e2fc0d36a4882475662 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 30 Jan 2020 17:31:53 +1100 Subject: powerpc/pseries/Makefile: Remove CONFIG_PPC_PSERIES check The pseries Makefile (arch/powerpc/platforms/pseries/Makefile) is only included by the platform Makefile (arch/powerpc/platform/Makefile) when CONFIG_PPC_PSERIES is selected, so checking for CONFIG_PPC_PSERIES in the pseries Makefile is pointless. Signed-off-by: Oliver O'Halloran Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200130063153.19915-2-oohall@gmail.com --- arch/powerpc/platforms/pseries/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index a3c74a5cf20d..c8a2b0b05ac0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -29,6 +29,4 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o -ifdef CONFIG_PPC_PSERIES obj-$(CONFIG_SUSPEND) += suspend.o -endif -- cgit v1.2.3-59-g8ed1b From 72c4ebbac476b8375e69fd09390e6b64c2891716 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 30 Jan 2020 09:32:06 +0530 Subject: powerpc/papr_scm: Mark papr_scm_ndctl() as static Function papr_scm_ndctl() is neither exported from the module nor called directly from outside 'papr.c' hence should be marked 'static'. Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200130040206.79998-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 0b4467e378e5..e4606100e286 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -246,8 +246,9 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } -int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, - unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) +static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; struct papr_scm_priv *p; -- cgit v1.2.3-59-g8ed1b From ba32f4b02105e57627912b42e141d65d90074c64 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 29 Jan 2020 19:50:07 +0000 Subject: powerpc/process: Remove unneccessary #ifdef CONFIG_PPC64 in copy_thread_tls() is_32bit_task() exists on both PPC64 and PPC32, no need of an ifdefery. Signed-off-by: Christophe Leroy Reviewed-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6ecbda05b4119c40222dc8ec284604e1597c9bff.1580327381.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/process.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index fad50db9dcf2..e730b8e522b0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1634,11 +1634,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, p->thread.regs = childregs; childregs->gpr[3] = 0; /* Result from fork() */ if (clone_flags & CLONE_SETTLS) { -#ifdef CONFIG_PPC64 if (!is_32bit_task()) childregs->gpr[13] = tls; else -#endif childregs->gpr[2] = tls; } -- cgit v1.2.3-59-g8ed1b From f52153ab383f04a45c38d8a7f55a4249477b20df Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:05 +0530 Subject: Documentation/ABI: Add ABI documentation for /sys/kernel/fadump_* Add missing ABI documentation for existing FADump sysfs files. Signed-off-by: Sourabh Jain Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-2-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump_enabled | 7 +++++++ Documentation/ABI/testing/sysfs-kernel-fadump_registered | 8 ++++++++ Documentation/ABI/testing/sysfs-kernel-fadump_release_mem | 8 ++++++++ Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore | 7 +++++++ 4 files changed, 30 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_enabled create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_registered create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_mem create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_enabled b/Documentation/ABI/testing/sysfs-kernel-fadump_enabled new file mode 100644 index 000000000000..f73632b1c006 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_enabled @@ -0,0 +1,7 @@ +What: /sys/kernel/fadump_enabled +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_registered b/Documentation/ABI/testing/sysfs-kernel-fadump_registered new file mode 100644 index 000000000000..dcf925e53f0f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_registered @@ -0,0 +1,8 @@ +What: /sys/kernel/fadump_registered +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem b/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem new file mode 100644 index 000000000000..9c20d64ab48d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem @@ -0,0 +1,8 @@ +What: /sys/kernel/fadump_release_mem +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore new file mode 100644 index 000000000000..53313c1d4e7a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore @@ -0,0 +1,7 @@ +What: /sys/kernel/fadump_release_opalcore +Date: Sep 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + The sysfs file is available when the system is booted to + collect the dump on OPAL based machine. It used to release + the memory used to collect the opalcore. -- cgit v1.2.3-59-g8ed1b From 9255782f70614c89b1a15ec6997c4b72ce9e630a Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:06 +0530 Subject: sysfs: Wrap __compat_only_sysfs_link_entry_to_kobj function to change the symlink name The __compat_only_sysfs_link_entry_to_kobj function creates a symlink to a kobject but doesn't provide an option to change the symlink file name. This patch adds a wrapper function compat_only_sysfs_link_entry_to_kobj that extends the __compat_only_sysfs_link_entry_to_kobj functionality which allows function caller to customize the symlink name. Signed-off-by: Sourabh Jain [mpe: Fix compile error when CONFIG_SYSFS=n] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-3-sourabhjain@linux.ibm.com --- fs/sysfs/group.c | 28 +++++++++++++++++++++++++--- include/linux/sysfs.h | 12 ++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index c4ab045926b7..1e2a096057bc 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -424,6 +424,25 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group); int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name) +{ + return compat_only_sysfs_link_entry_to_kobj(kobj, target_kobj, + target_name, NULL); +} +EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); + +/** + * compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing + * to a group or an attribute + * @kobj: The kobject containing the group. + * @target_kobj: The target kobject. + * @target_name: The name of the target group or attribute. + * @symlink_name: The name of the symlink file (target_name will be + * considered if symlink_name is NULL). + */ +int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name) { struct kernfs_node *target; struct kernfs_node *entry; @@ -448,12 +467,15 @@ int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, return -ENOENT; } - link = kernfs_create_link(kobj->sd, target_name, entry); + if (!symlink_name) + symlink_name = target_name; + + link = kernfs_create_link(kobj->sd, symlink_name, entry); if (PTR_ERR(link) == -EEXIST) - sysfs_warn_dup(kobj->sd, target_name); + sysfs_warn_dup(kobj->sd, symlink_name); kernfs_put(entry); kernfs_put(target); return PTR_ERR_OR_ZERO(link); } -EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); +EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fa7ee503fb76..7462315a643b 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -300,6 +300,10 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name); +int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); @@ -508,6 +512,14 @@ static inline int __compat_only_sysfs_link_entry_to_kobj( return 0; } +static inline int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name) +{ + return 0; +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { -- cgit v1.2.3-59-g8ed1b From d418b19f34ed0c751a69810080596f7e749595aa Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:07 +0530 Subject: powerpc/fadump: Reorganize /sys/kernel/fadump_* sysfs files As the number of FADump sysfs files increases it is hard to manage all of them inside /sys/kernel directory. It's better to have all the FADump related sysfs files in a dedicated directory /sys/kernel/fadump. But in order to maintain backward compatibility a symlink has been added for every sysfs that has moved to new location. As the FADump sysfs files are now part of a dedicated directory there is no need to prefix their name with fadump_, hence sysfs file names are also updated. For example fadump_enabled sysfs file is now referred as enabled. Also consolidate ABI documentation for all the FADump sysfs files in a single file Documentation/ABI/testing/sysfs-kernel-fadump. Signed-off-by: Sourabh Jain Tested-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-4-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump | 33 +++++++ arch/powerpc/kernel/fadump.c | 119 ++++++++++++++++++-------- 2 files changed, 118 insertions(+), 34 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump new file mode 100644 index 000000000000..5d988b919e81 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -0,0 +1,33 @@ +What: /sys/kernel/fadump/* +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: + The /sys/kernel/fadump/* is a collection of FADump sysfs + file provide information about the configuration status + of Firmware Assisted Dump (FADump). + +What: /sys/kernel/fadump/enabled +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service + +What: /sys/kernel/fadump/registered +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service + +What: /sys/kernel/fadump/release_mem +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ff0114aeba9b..1182ae44ef14 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -36,6 +36,8 @@ static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); +struct kobject *fadump_kobj; + #ifndef CONFIG_PRESERVE_FA_DUMP static DEFINE_MUTEX(fadump_mutex); struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; @@ -1323,9 +1325,9 @@ static void fadump_invalidate_release_mem(void) fw_dump.ops->fadump_init_mem_struct(&fw_dump); } -static ssize_t fadump_release_memory_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t release_mem_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int input = -1; @@ -1350,23 +1352,33 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj, return count; } -static ssize_t fadump_enabled_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +/* Release the reserved memory and disable the FADump */ +static void unregister_fadump(void) +{ + fadump_cleanup(); + fadump_release_memory(fw_dump.reserve_dump_area_start, + fw_dump.reserve_dump_area_size); + fw_dump.fadump_enabled = 0; + kobject_put(fadump_kobj); +} + +static ssize_t enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } -static ssize_t fadump_register_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +static ssize_t registered_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", fw_dump.dump_registered); } -static ssize_t fadump_register_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t registered_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int ret = 0; int input = -1; @@ -1418,15 +1430,17 @@ static int fadump_region_show(struct seq_file *m, void *private) return 0; } -static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, - 0200, NULL, - fadump_release_memory_store); -static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, - 0444, fadump_enabled_show, - NULL); -static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, - 0644, fadump_register_show, - fadump_register_store); +static struct kobj_attribute release_attr = __ATTR_WO(release_mem); +static struct kobj_attribute enable_attr = __ATTR_RO(enabled); +static struct kobj_attribute register_attr = __ATTR_RW(registered); + +static struct attribute *fadump_attrs[] = { + &enable_attr.attr, + ®ister_attr.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(fadump); DEFINE_SHOW_ATTRIBUTE(fadump_region); @@ -1435,16 +1449,11 @@ static void fadump_init_files(void) struct dentry *debugfs_file; int rc = 0; - rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); - if (rc) - printk(KERN_ERR "fadump: unable to create sysfs file" - " fadump_enabled (%d)\n", rc); - - rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); - if (rc) - printk(KERN_ERR "fadump: unable to create sysfs file" - " fadump_registered (%d)\n", rc); - + fadump_kobj = kobject_create_and_add("fadump", kernel_kobj); + if (!fadump_kobj) { + pr_err("failed to create fadump kobject\n"); + return; + } debugfs_file = debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL, &fadump_region_fops); @@ -1453,10 +1462,52 @@ static void fadump_init_files(void) " fadump_region\n"); if (fw_dump.dump_active) { - rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); + rc = sysfs_create_file(fadump_kobj, &release_attr.attr); + if (rc) + pr_err("unable to create release_mem sysfs file (%d)\n", + rc); + } + + rc = sysfs_create_groups(fadump_kobj, fadump_groups); + if (rc) { + pr_err("sysfs group creation failed (%d), unregistering FADump", + rc); + unregister_fadump(); + return; + } + + /* + * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to + * create symlink at old location to maintain backward compatibility. + * + * - fadump_enabled -> fadump/enabled + * - fadump_registered -> fadump/registered + * - fadump_release_mem -> fadump/release_mem + */ + rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, + "enabled", "fadump_enabled"); + if (rc) { + pr_err("unable to create fadump_enabled symlink (%d)", rc); + return; + } + + rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, + "registered", + "fadump_registered"); + if (rc) { + pr_err("unable to create fadump_registered symlink (%d)", rc); + sysfs_remove_link(kernel_kobj, "fadump_enabled"); + return; + } + + if (fw_dump.dump_active) { + rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, + fadump_kobj, + "release_mem", + "fadump_release_mem"); if (rc) - printk(KERN_ERR "fadump: unable to create sysfs file" - " fadump_release_mem (%d)\n", rc); + pr_err("unable to create fadump_release_mem symlink (%d)", + rc); } return; } -- cgit v1.2.3-59-g8ed1b From 8852c07a881b0acfd3d75cf3927adaab815c4ee5 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:08 +0530 Subject: powerpc/powernv: Move core and fadump_release_opalcore under new kobject The /sys/firmware/opal/core and /sys/kernel/fadump_release_opalcore sysfs files are used to export and release the OPAL memory on PowerNV platform. let's organize them into a new kobject under /sys/firmware/opal/mpipl/ directory. A symlink is added to maintain the backward compatibility for /sys/firmware/opal/core sysfs file. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-5-sourabhjain@linux.ibm.com --- .../removed/sysfs-kernel-fadump_release_opalcore | 9 ++++ .../testing/sysfs-kernel-fadump_release_opalcore | 7 --- Documentation/powerpc/firmware-assisted-dump.rst | 15 +++--- arch/powerpc/platforms/powernv/opal-core.c | 55 ++++++++++++++++------ 4 files changed, 58 insertions(+), 28 deletions(-) create mode 100644 Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore diff --git a/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore new file mode 100644 index 000000000000..a8d46cd0f4e6 --- /dev/null +++ b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore @@ -0,0 +1,9 @@ +This ABI is moved to /sys/firmware/opal/mpipl/release_core. + +What: /sys/kernel/fadump_release_opalcore +Date: Sep 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + The sysfs file is available when the system is booted to + collect the dump on OPAL based machine. It used to release + the memory used to collect the opalcore. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore deleted file mode 100644 index 53313c1d4e7a..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore +++ /dev/null @@ -1,7 +0,0 @@ -What: /sys/kernel/fadump_release_opalcore -Date: Sep 2019 -Contact: linuxppc-dev@lists.ozlabs.org -Description: write only - The sysfs file is available when the system is booted to - collect the dump on OPAL based machine. It used to release - the memory used to collect the opalcore. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 0455a78486d5..345a3405206e 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -112,13 +112,13 @@ to ensure that crash data is preserved to process later. -- On OPAL based machines (PowerNV), if the kernel is build with CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also - exported as /sys/firmware/opal/core file. This procfs file is + exported as /sys/firmware/opal/mpipl/core file. This procfs file is helpful in debugging OPAL crashes with GDB. The kernel memory used for exporting this procfs file can be released by echo'ing - '1' to /sys/kernel/fadump_release_opalcore node. + '1' to /sys/firmware/opal/mpipl/release_core node. e.g. - # echo 1 > /sys/kernel/fadump_release_opalcore + # echo 1 > /sys/firmware/opal/mpipl/release_core Implementation details: ----------------------- @@ -283,14 +283,17 @@ Here is the list of files under kernel sysfs: enhanced to use this interface to release the memory reserved for dump and continue without 2nd reboot. - /sys/kernel/fadump_release_opalcore +Note: /sys/kernel/fadump_release_opalcore sysfs has moved to + /sys/firmware/opal/mpipl/release_core + + /sys/firmware/opal/mpipl/release_core This file is available only on OPAL based machines when FADump is active during capture kernel. This is used to release the memory - used by the kernel to export /sys/firmware/opal/core file. To + used by the kernel to export /sys/firmware/opal/mpipl/core file. To release this memory, echo '1' to it: - echo 1 > /sys/kernel/fadump_release_opalcore + echo 1 > /sys/firmware/opal/mpipl/release_core Here is the list of files under powerpc debugfs: (Assuming debugfs is mounted on /sys/kernel/debug directory.) diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index ed895d82c048..6dba3b62269f 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -71,6 +71,7 @@ static LIST_HEAD(opalcore_list); static struct opalcore_config *oc_conf; static const struct opal_mpipl_fadump *opalc_metadata; static const struct opal_mpipl_fadump *opalc_cpu_metadata; +struct kobject *mpipl_kobj; /* * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered @@ -428,7 +429,7 @@ static void opalcore_cleanup(void) return; /* Remove OPAL core sysfs file */ - sysfs_remove_bin_file(opal_kobj, &opal_core_attr); + sysfs_remove_bin_file(mpipl_kobj, &opal_core_attr); oc_conf->ptload_phdr = NULL; oc_conf->ptload_cnt = 0; @@ -563,9 +564,9 @@ error_out: of_node_put(np); } -static ssize_t fadump_release_opalcore_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t release_core_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int input = -1; @@ -589,9 +590,23 @@ static ssize_t fadump_release_opalcore_store(struct kobject *kobj, return count; } -static struct kobj_attribute opalcore_rel_attr = __ATTR(fadump_release_opalcore, - 0200, NULL, - fadump_release_opalcore_store); +static struct kobj_attribute opalcore_rel_attr = __ATTR_WO(release_core); + +static struct attribute *mpipl_attr[] = { + &opalcore_rel_attr.attr, + NULL, +}; + +static struct bin_attribute *mpipl_bin_attr[] = { + &opal_core_attr, + NULL, + +}; + +static struct attribute_group mpipl_group = { + .attrs = mpipl_attr, + .bin_attrs = mpipl_bin_attr, +}; static int __init opalcore_init(void) { @@ -609,7 +624,7 @@ static int __init opalcore_init(void) * then capture the dump. */ if (!(is_opalcore_usable())) { - pr_err("Failed to export /sys/firmware/opal/core\n"); + pr_err("Failed to export /sys/firmware/opal/mpipl/core\n"); opalcore_cleanup(); return rc; } @@ -617,18 +632,28 @@ static int __init opalcore_init(void) /* Set OPAL core file size */ opal_core_attr.size = oc_conf->opalcore_size; + mpipl_kobj = kobject_create_and_add("mpipl", opal_kobj); + if (!mpipl_kobj) { + pr_err("unable to create mpipl kobject\n"); + return -ENOMEM; + } + /* Export OPAL core sysfs file */ - rc = sysfs_create_bin_file(opal_kobj, &opal_core_attr); - if (rc != 0) { - pr_err("Failed to export /sys/firmware/opal/core\n"); + rc = sysfs_create_group(mpipl_kobj, &mpipl_group); + if (rc) { + pr_err("mpipl sysfs group creation failed (%d)", rc); opalcore_cleanup(); return rc; } - - rc = sysfs_create_file(kernel_kobj, &opalcore_rel_attr.attr); + /* The /sys/firmware/opal/core is moved to /sys/firmware/opal/mpipl/ + * directory, need to create symlink at old location to maintain + * backward compatibility. + */ + rc = compat_only_sysfs_link_entry_to_kobj(opal_kobj, mpipl_kobj, + "core", NULL); if (rc) { - pr_warn("unable to create sysfs file fadump_release_opalcore (%d)\n", - rc); + pr_err("unable to create core symlink (%d)\n", rc); + return rc; } return 0; -- cgit v1.2.3-59-g8ed1b From 3f5f1f22ef10ee0278cef1243944c93aca01b236 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:09 +0530 Subject: Documentation/ABI: Mark /sys/kernel/fadump_* sysfs files deprecated Add a deprecation note in FADump sysfs ABI documentation files and move them from ABI/testing to ABI/obsolete directory. Signed-off-by: Sourabh Jain [mpe: Use a proper table to fix errors from the documentation build] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-6-sourabhjain@linux.ibm.com --- Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled | 9 +++++++++ Documentation/ABI/obsolete/sysfs-kernel-fadump_registered | 10 ++++++++++ Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem | 10 ++++++++++ Documentation/ABI/testing/sysfs-kernel-fadump_enabled | 7 ------- Documentation/ABI/testing/sysfs-kernel-fadump_registered | 8 -------- Documentation/ABI/testing/sysfs-kernel-fadump_release_mem | 8 -------- Documentation/powerpc/firmware-assisted-dump.rst | 12 ++++++++++++ 7 files changed, 41 insertions(+), 23 deletions(-) create mode 100644 Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled create mode 100644 Documentation/ABI/obsolete/sysfs-kernel-fadump_registered create mode 100644 Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_enabled delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_registered delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_mem diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled new file mode 100644 index 000000000000..e9c2de8b3688 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled @@ -0,0 +1,9 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/enabled. + +What: /sys/kernel/fadump_enabled +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered new file mode 100644 index 000000000000..0360be39c98e --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered @@ -0,0 +1,10 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬ + +What: /sys/kernel/fadump_registered +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem new file mode 100644 index 000000000000..6ce0b129ab12 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem @@ -0,0 +1,10 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬ + +What: /sys/kernel/fadump_release_mem +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_enabled b/Documentation/ABI/testing/sysfs-kernel-fadump_enabled deleted file mode 100644 index f73632b1c006..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_enabled +++ /dev/null @@ -1,7 +0,0 @@ -What: /sys/kernel/fadump_enabled -Date: Feb 2012 -Contact: linuxppc-dev@lists.ozlabs.org -Description: read only - Primarily used to identify whether the FADump is enabled in - the kernel or not. -User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_registered b/Documentation/ABI/testing/sysfs-kernel-fadump_registered deleted file mode 100644 index dcf925e53f0f..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_registered +++ /dev/null @@ -1,8 +0,0 @@ -What: /sys/kernel/fadump_registered -Date: Feb 2012 -Contact: linuxppc-dev@lists.ozlabs.org -Description: read/write - Helps to control the dump collect feature from userspace. - Setting 1 to this file enables the system to collect the - dump and 0 to disable it. -User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem b/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem deleted file mode 100644 index 9c20d64ab48d..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem +++ /dev/null @@ -1,8 +0,0 @@ -What: /sys/kernel/fadump_release_mem -Date: Feb 2012 -Contact: linuxppc-dev@lists.ozlabs.org -Description: write only - This is a special sysfs file and only available when - the system is booted to capture the vmcore using FADump. - It is used to release the memory reserved by FADump to - save the crash dump. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 345a3405206e..2cd65a0df9b8 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -295,6 +295,18 @@ Note: /sys/kernel/fadump_release_opalcore sysfs has moved to echo 1 > /sys/firmware/opal/mpipl/release_core +Note: The following FADump sysfs files are deprecated. + ++----------------------------------+--------------------------------+ +| Deprecated | Alternative | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_enabled | /sys/kernel/fadump/enabled | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_registered | /sys/kernel/fadump/registered | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_release_mem | /sys/kernel/fadump/release_mem | ++----------------------------------+--------------------------------+ + Here is the list of files under powerpc debugfs: (Assuming debugfs is mounted on /sys/kernel/debug directory.) -- cgit v1.2.3-59-g8ed1b From d8e73458f33a24810413ee3a0cd020b644de2f98 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:10 +0530 Subject: powerpc/fadump: sysfs for fadump memory reservation Add a sys interface to allow querying the memory reserved by FADump for saving the crash dump. Also added Documentation/ABI for the new sysfs file. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-7-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump | 7 +++++++ Documentation/powerpc/firmware-assisted-dump.rst | 5 +++++ arch/powerpc/kernel/fadump.c | 9 +++++++++ 3 files changed, 21 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump index 5d988b919e81..8f7a64a81783 100644 --- a/Documentation/ABI/testing/sysfs-kernel-fadump +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -31,3 +31,10 @@ Description: write only the system is booted to capture the vmcore using FADump. It is used to release the memory reserved by FADump to save the crash dump. + +What: /sys/kernel/fadump/mem_reserved +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Provide information about the amount of memory reserved by + FADump to save the crash dump in bytes. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 2cd65a0df9b8..b3f3ee135dbe 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -268,6 +268,11 @@ Here is the list of files under kernel sysfs: be handled and vmcore will not be captured. This interface can be easily integrated with kdump service start/stop. + /sys/kernel/fadump/mem_reserved + + This is used to display the memory reserved by FADump for saving the + crash dump. + /sys/kernel/fadump_release_mem This file is available only when FADump is active during second kernel. This is used to release the reserved memory diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 1182ae44ef14..265b4aa72252 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1369,6 +1369,13 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +static ssize_t mem_reserved_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size); +} + static ssize_t registered_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1433,10 +1440,12 @@ static int fadump_region_show(struct seq_file *m, void *private) static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); +static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, + &mem_reserved_attr.attr, NULL, }; -- cgit v1.2.3-59-g8ed1b From 030e347430957f6f7f29db9099368f8b86c0bf76 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 1 Feb 2020 08:04:31 +0000 Subject: powerpc/32s: Don't flush all TLBs when flushing one page When flushing any memory range, the flushing function flushes all TLBs. When (start) and (end - 1) are in the same memory page, flush that page instead. Signed-off-by: Christophe Leroy Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b30b2eae6960502eaf0d9e36c60820b839693c33.1580542939.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/book3s32/tlb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 2fcd321040ff..724c0490fb17 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -79,11 +79,14 @@ static void flush_range(struct mm_struct *mm, unsigned long start, int count; unsigned int ctx = mm->context.id; + start &= PAGE_MASK; if (!Hash) { - _tlbia(); + if (end - start <= PAGE_SIZE) + _tlbie(start); + else + _tlbia(); return; } - start &= PAGE_MASK; if (start >= end) return; end = (end - 1) | ~PAGE_MASK; -- cgit v1.2.3-59-g8ed1b From 9e27086292aa880921a0f2b8501e5189d5efcf03 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 31 Jan 2020 11:34:54 +0000 Subject: powerpc/32: Warn and return ENOSYS on syscalls from kernel Since commit b86fb88855ea ("powerpc/32: implement fast entry for syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32: implement fast entry for syscalls on BOOKE"), syscalls from kernel are unexpected and can have catastrophic consequences as it will destroy the kernel stack. Test MSR_PR on syscall entry. In case syscall is from kernel, emit a warning and return ENOSYS error. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8ee3bdbbdfdfc64ca7001e90c43b2aee6f333578.1580470482.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/entry_32.S | 27 +++++++++++++++++++++++++++ arch/powerpc/kernel/head_32.h | 16 +++++++++------- arch/powerpc/kernel/head_booke.h | 5 ++++- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0713daa651d9..ad000cbb5252 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -575,6 +575,33 @@ syscall_exit_work: bl do_syscall_trace_leave b ret_from_except_full + /* + * System call was called from kernel. We get here with SRR1 in r9. + * Mark the exception as recoverable once we have retrieved SRR0, + * trap a warning and return ENOSYS with CR[SO] set. + */ + .globl ret_from_kernel_syscall +ret_from_kernel_syscall: + mfspr r9, SPRN_SRR0 + mfspr r10, SPRN_SRR1 +#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE) + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR)) + mtmsr r11 +#endif + +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + + li r3, ENOSYS + crset so +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) + mtspr SPRN_NRI, r0 +#endif + mtspr SPRN_SRR0, r9 + mtspr SPRN_SRR1, r10 + SYNC + RFI + /* * The fork/clone functions need to copy the full register set into * the child process. Therefore we need to save all the nonvolatile diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a6a5fbbf8504..0e7bf28fe53a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -111,14 +111,16 @@ .macro SYSCALL_ENTRY trapno mfspr r12,SPRN_SPRG_THREAD + mfspr r9, SPRN_SRR1 #ifdef CONFIG_VMAP_STACK - mfspr r9, SPRN_SRR0 - mfspr r11, SPRN_SRR1 - stw r9, SRR0(r12) - stw r11, SRR1(r12) + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r12) + stw r9, SRR1(r12) #endif mfcr r10 + andi. r11, r9, MSR_PR lwz r11,TASK_STACK-THREAD(r12) + beq- 99f rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE #ifdef CONFIG_VMAP_STACK @@ -128,15 +130,14 @@ #endif tovirt_vmstack r12, r12 tophys_novmstack r11, r11 - mflr r9 stw r10,_CCR(r11) /* save registers */ - stw r9, _LINK(r11) + mflr r10 + stw r10, _LINK(r11) #ifdef CONFIG_VMAP_STACK lwz r10, SRR0(r12) lwz r9, SRR1(r12) #else mfspr r10,SPRN_SRR0 - mfspr r9,SPRN_SRR1 #endif stw r1,GPR1(r11) stw r1,0(r11) @@ -209,6 +210,7 @@ mtspr SPRN_SRR0,r11 SYNC RFI /* jump to handler, enable MMU */ +99: b ret_from_kernel_syscall .endm .macro save_dar_dsisr_on_stack reg1, reg2, sp diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 37fc84ed90e3..bd2e5ed8dd50 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -104,16 +104,18 @@ FTR_SECTION_ELSE #ifdef CONFIG_KVM_BOOKE_HV ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif + mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) + andi. r11, r9, MSR_PR lwz r11, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + beq- 99f ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) stw r12, _CCR(r11) /* save various registers */ mflr r12 stw r12,_LINK(r11) mfspr r12,SPRN_SRR0 stw r1, GPR1(r11) - mfspr r9,SPRN_SRR1 stw r1, 0(r11) mr r1, r11 stw r12,_NIP(r11) @@ -176,6 +178,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mtspr SPRN_SRR0,r11 SYNC RFI /* jump to handler, enable MMU */ +99: b ret_from_kernel_syscall .endm /* To handle the additional exception priority levels on 40x and Book-E -- cgit v1.2.3-59-g8ed1b From c06f0aff035ed5a7eaff5daa8e11e7ad28ab0d54 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 31 Jan 2020 11:34:55 +0000 Subject: powerpc: Don't use thread struct for saving SRR0/1 on syscall. CR0 can be saved later, and CTR can also be used for saving. Keep SRR1 in r9 and stash SRR0 in CTR, this avoids using thread_struct in memory for that. Saves 3 cycles (ie 1%) in null_syscall selftest on 8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b94c3bc03bac9431fec2dadb686384c481889422.1580470483.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/head_32.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 0e7bf28fe53a..4a1faeded069 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -114,28 +114,23 @@ mfspr r9, SPRN_SRR1 #ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SRR0 - stw r11, SRR0(r12) - stw r9, SRR1(r12) + mtctr r11 #endif - mfcr r10 andi. r11, r9, MSR_PR lwz r11,TASK_STACK-THREAD(r12) beq- 99f - rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE #ifdef CONFIG_VMAP_STACK - li r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r9 + li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r10 isync #endif tovirt_vmstack r12, r12 tophys_novmstack r11, r11 - stw r10,_CCR(r11) /* save registers */ mflr r10 stw r10, _LINK(r11) #ifdef CONFIG_VMAP_STACK - lwz r10, SRR0(r12) - lwz r9, SRR1(r12) + mfctr r10 #else mfspr r10,SPRN_SRR0 #endif @@ -143,6 +138,9 @@ stw r1,0(r11) tovirt_novmstack r1, r11 /* set new kernel sp */ stw r10,_NIP(r11) + mfcr r10 + rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + stw r10,_CCR(r11) /* save registers */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #else -- cgit v1.2.3-59-g8ed1b From a83836dbc53e96f13fec248ecc201d18e1e3111d Mon Sep 17 00:00:00 2001 From: Libor Pechacek Date: Fri, 31 Jan 2020 14:28:29 +0100 Subject: powerpc/pseries: Avoid NULL pointer dereference when drmem is unavailable In guests without hotplugagble memory drmem structure is only zero initialized. Trying to manipulate DLPAR parameters results in a crash. $ echo "memory add count 1" > /sys/kernel/dlpar Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries ... NIP: c0000000000ff294 LR: c0000000000ff248 CTR: 0000000000000000 REGS: c0000000fb9d3880 TRAP: 0300 Tainted: G E (5.5.0-rc6-2-default) MSR: 8000000000009033 CR: 28242428 XER: 20000000 CFAR: c0000000009a6c10 DAR: 0000000000000010 DSISR: 40000000 IRQMASK: 0 ... NIP dlpar_memory+0x6e4/0xd00 LR dlpar_memory+0x698/0xd00 Call Trace: dlpar_memory+0x698/0xd00 (unreliable) handle_dlpar_errorlog+0xc0/0x190 dlpar_store+0x198/0x4a0 kobj_attr_store+0x30/0x50 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 __vfs_write+0x3c/0x70 vfs_write+0xd0/0x260 ksys_write+0xdc/0x130 system_call+0x5c/0x68 Taking closer look at the code, I can see that for_each_drmem_lmb is a macro expanding into `for (lmb = &drmem_info->lmbs[0]; lmb <= &drmem_info->lmbs[drmem_info->n_lmbs - 1]; lmb++)`. When drmem_info->lmbs is NULL, the loop would iterate through the whole address range if it weren't stopped by the NULL pointer dereference on the next line. This patch aligns for_each_drmem_lmb and for_each_drmem_lmb_in_range macro behavior with the common C semantics, where the end marker does not belong to the scanned range, and alters get_lmb_range() semantics. As a side effect, the wraparound observed in the crash is prevented. Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT format") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Libor Pechacek Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200131132829.10281-1-msuchanek@suse.de --- arch/powerpc/include/asm/drmem.h | 4 ++-- arch/powerpc/platforms/pseries/hotplug-memory.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 3d76e1c388c2..28c3d936fdf3 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -27,12 +27,12 @@ struct drmem_lmb_info { extern struct drmem_lmb_info *drmem_info; #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) <= (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); (lmb)++) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ &drmem_info->lmbs[0], \ - &drmem_info->lmbs[drmem_info->n_lmbs - 1]) + &drmem_info->lmbs[drmem_info->n_lmbs]) /* * The of_drconf_cell_v1 struct defines the layout of the LMB data diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index a4d40a3ceea3..fd22ec41c008 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -223,7 +223,7 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, struct drmem_lmb **end_lmb) { struct drmem_lmb *lmb, *start, *end; - struct drmem_lmb *last_lmb; + struct drmem_lmb *limit; start = NULL; for_each_drmem_lmb(lmb) { @@ -236,10 +236,10 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, if (!start) return -EINVAL; - end = &start[n_lmbs - 1]; + end = &start[n_lmbs]; - last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; - if (end > last_lmb) + limit = &drmem_info->lmbs[drmem_info->n_lmbs]; + if (end > limit) return -EINVAL; *start_lmb = start; -- cgit v1.2.3-59-g8ed1b From e1347a020b81fe47c80cd277bfaa61295a9482a4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 3 Feb 2020 16:47:37 +0000 Subject: powerpc/32s: Slenderize _tlbia() for powerpc 603/603e _tlbia() is a function used only on 603/603e core, ie on CPUs which don't have a hash table. _tlbia() uses the tlbia macro which implements a loop of 1024 tlbie. On the 603/603e core, flushing the entire TLB requires no more than 32 tlbie. Replace tlbia by a loop of 32 tlbie. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/12f4f4f0ff89aeab3b937fc96c84fb35e1b2517e.1580748445.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/book3s32/hash_low.S | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index c11b0a005196..a5039ad10429 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -696,18 +696,21 @@ _GLOBAL(_tlbia) bne- 10b stwcx. r8,0,r9 bne- 10b +#endif /* CONFIG_SMP */ + li r5, 32 + lis r4, KERNELBASE@h + mtctr r5 sync - tlbia +0: tlbie r4 + addi r4, r4, 0x1000 + bdnz 0b sync +#ifdef CONFIG_SMP TLBSYNC li r0,0 stw r0,0(r9) /* clear mmu_hash_lock */ mtmsr r10 SYNC_601 isync -#else /* CONFIG_SMP */ - sync - tlbia - sync #endif /* CONFIG_SMP */ blr -- cgit v1.2.3-59-g8ed1b From 05642cf7289c5562e5939d2ee8a0529d310010b8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 7 Jan 2020 09:16:40 +0000 Subject: powerpc/32: don't restore r0, r6-r8 on exception entry path after trace_hardirqs_off() Since commit b86fb88855ea ("powerpc/32: implement fast entry for syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32: implement fast entry for syscalls on BOOKE"), syscalls don't use the exception entry path anymore. It is therefore pointless to restore r0 and r6-r8 after calling trace_hardirqs_off(). In the meantime, drop the '2:' label which is unused and misleading. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d2c6dc65d27e83964eb05f16a126161ab6455eea.1578388585.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/entry_32.S | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ad000cbb5252..afab378c3d28 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -246,9 +246,8 @@ reenable_mmu: * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, * r4 & r5 can contain page fault arguments that need to be passed - * along as well. r12, CCR, CTR, XER etc... are left clobbered as - * they aren't useful past this point (aren't syscall arguments), - * the rest is restored from the exception frame. + * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left + * clobbered as they aren't useful past this point. */ stwu r1,-32(r1) @@ -262,16 +261,12 @@ reenable_mmu: * lockdep */ 1: bl trace_hardirqs_off -2: lwz r5,24(r1) + lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) lwz r9,8(r1) addi r1,r1,32 - lwz r0,GPR0(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) mtctr r11 mtlr r9 bctr /* jump to handler */ -- cgit v1.2.3-59-g8ed1b From 0b1c524caaae2428b20e714297243e5551251eb5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jan 2020 08:25:25 +0000 Subject: powerpc/32: refactor pmd_offset(pud_offset(pgd_offset... At several places pmd pointer is retrieved through the same action: pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); or pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); Refactor this by implementing two helpers pmd_ptr() and pmd_ptr_k() This will help when adding the p4d level. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7b065c5be35726af4066cab238ee35cabceda1fa.1578558199.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/pgtable.h | 12 ++++++++++++ arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/book3s32/tlb.c | 4 ++-- arch/powerpc/mm/kasan/kasan_init_32.c | 8 ++++---- arch/powerpc/mm/mem.c | 3 +-- arch/powerpc/mm/nohash/40x.c | 4 ++-- arch/powerpc/mm/pgtable_32.c | 2 +- 7 files changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 8cc543ed114c..22bf7bb666a7 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,6 +41,18 @@ struct mm_struct; #ifndef __ASSEMBLY__ +#ifdef CONFIG_PPC32 +static inline pmd_t *pmd_ptr(struct mm_struct *mm, unsigned long va) +{ + return pmd_offset(pud_offset(pgd_offset(mm, va), va), va); +} + +static inline pmd_t *pmd_ptr_k(unsigned long va) +{ + return pmd_offset(pud_offset(pgd_offset_k(va), va), va); +} +#endif + #include /* Keep these as a macros to avoid include dependency mess */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 0a1c65a2c565..c225b0397ffd 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) if (!Hash) return; - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); + pmd = pmd_ptr(mm, ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 724c0490fb17..dc9039a170aa 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -90,7 +90,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); + pmd = pmd_ptr(mm, start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -148,7 +148,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); + pmd = pmd_ptr(mm, vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 16dd95bd0749..b195d085970a 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -36,7 +36,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned unsigned long k_cur, k_next; pte_t *new = NULL; - pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + pmd = pmd_ptr_k(k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { k_next = pgd_addr_end(k_cur, k_end); @@ -78,7 +78,7 @@ static int __init kasan_init_region(void *start, size_t size) block = memblock_alloc(k_end - k_start, PAGE_SIZE); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_ptr_k(k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); @@ -102,7 +102,7 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_ptr_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) @@ -202,7 +202,7 @@ void __init kasan_early_init(void) unsigned long addr = KASAN_SHADOW_START; unsigned long end = KASAN_SHADOW_END; unsigned long next; - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); + pmd_t *pmd = pmd_ptr_k(addr); BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ef7b1119b2e2..b7325bb4c890 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -69,8 +69,7 @@ EXPORT_SYMBOL(kmap_prot); static inline pte_t *virt_to_kpte(unsigned long vaddr) { - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), - vaddr), vaddr), vaddr); + return pte_offset_kernel(pmd_ptr_k(vaddr), vaddr); } #endif diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index f348104eb461..82862723ab42 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -104,7 +104,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_ptr_k(v); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_ptr_k(v); *pmdp = __pmd(val); v += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5fb90edd865e..d90c166bb6e5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -63,7 +63,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) int err = -ENOMEM; /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); + pd = pmd_ptr_k(va); /* Use middle 10 bits of VA to index the second-level map */ if (likely(slab_is_available())) pg = pte_alloc_kernel(pd, va); -- cgit v1.2.3-59-g8ed1b From 2efc7c085f05870eda6f29ac71eeb83f3bd54415 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Jan 2020 08:25:26 +0000 Subject: powerpc/32: drop get_pteptr() Commit 8d30c14cab30 ("powerpc/mm: Rework I$/D$ coherency (v3)") and commit 90ac19a8b21b ("[POWERPC] Abolish iopa(), mm_ptov(), io_block_mapping() from arch/powerpc") removed the use of get_pteptr() outside of mm/pgtable_32.c In mm/pgtable_32.c, the only user of get_pteptr() is change_page_attr() which operates on kernel context and on lowmem pages only. Make virt_to_kpte() available outside of mm/mem.c and use it instead of get_pteptr(), and drop get_pteptr() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/788378c6c3ba5c5298caab7c7f95e6c3c88244b8.1578558199.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/pgtable.h | 5 +++++ arch/powerpc/mm/mem.c | 5 ----- arch/powerpc/mm/pgtable_32.c | 39 ++------------------------------------ 3 files changed, 7 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 22bf7bb666a7..b80bfd41828d 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -51,6 +51,11 @@ static inline pmd_t *pmd_ptr_k(unsigned long va) { return pmd_offset(pud_offset(pgd_offset_k(va), va), va); } + +static inline pte_t *virt_to_kpte(unsigned long vaddr) +{ + return pte_offset_kernel(pmd_ptr_k(vaddr), vaddr); +} #endif #include diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index b7325bb4c890..4f852d2a62f5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -66,11 +66,6 @@ pte_t *kmap_pte; EXPORT_SYMBOL(kmap_pte); pgprot_t kmap_prot; EXPORT_SYMBOL(kmap_prot); - -static inline pte_t *virt_to_kpte(unsigned long vaddr) -{ - return pte_offset_kernel(pmd_ptr_k(vaddr), vaddr); -} #endif pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index d90c166bb6e5..f62de06e3d07 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -121,44 +121,9 @@ void __init mapin_ram(void) } } -/* Scan the real Linux page tables and return a PTE pointer for - * a virtual address in a context. - * Returns true (1) if PTE was found, zero otherwise. The pointer to - * the PTE pointer is unmodified if PTE is not found. - */ -static int -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int retval = 0; - - pgd = pgd_offset(mm, addr & PAGE_MASK); - if (pgd) { - pud = pud_offset(pgd, addr & PAGE_MASK); - if (pud && pud_present(*pud)) { - pmd = pmd_offset(pud, addr & PAGE_MASK); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd, addr & PAGE_MASK); - if (pte) { - retval = 1; - *ptep = pte; - if (pmdp) - *pmdp = pmd; - /* XXX caller needs to do pte_unmap, yuck */ - } - } - } - } - return(retval); -} - static int __change_page_attr_noflush(struct page *page, pgprot_t prot) { pte_t *kpte; - pmd_t *kpmd; unsigned long address; BUG_ON(PageHighMem(page)); @@ -166,10 +131,10 @@ static int __change_page_attr_noflush(struct page *page, pgprot_t prot) if (v_block_mapped(address)) return 0; - if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) + kpte = virt_to_kpte(address); + if (!kpte) return -EINVAL; __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); - pte_unmap(kpte); return 0; } -- cgit v1.2.3-59-g8ed1b From 365ad0b60d944050d61252e123e6a8b2c3950398 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 Feb 2020 15:09:04 +0100 Subject: powerpc/83xx: Fix some typo in some warning message "couldn;t" should be "couldn't". Signed-off-by: Christophe JAILLET Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200208140904.7521-1-christophe.jaillet@wanadoo.fr --- arch/powerpc/platforms/83xx/km83xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index ada42f03915a..306be75faec7 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -53,13 +53,13 @@ static void quirk_mpc8360e_qe_enet10(void) np_par = of_find_node_by_name(NULL, "par_io"); if (np_par == NULL) { - pr_warn("%s couldn;t find par_io node\n", __func__); + pr_warn("%s couldn't find par_io node\n", __func__); return; } /* Map Parallel I/O ports registers */ ret = of_address_to_resource(np_par, 0, &res); if (ret) { - pr_warn("%s couldn;t map par_io registers\n", __func__); + pr_warn("%s couldn't map par_io registers\n", __func__); return; } -- cgit v1.2.3-59-g8ed1b From 88654d5b4476a438bd86b257dc4d70bb32266f1b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 Feb 2020 15:09:20 +0100 Subject: powerpc/83xx: Add some error handling in 'quirk_mpc8360e_qe_enet10()' In some error handling path, we should call "of_node_put(np_par)" or some resource may be leaking in case of error. Fixes: 8159df72d43e ("83xx: add support for the kmeter1 board.") Signed-off-by: Christophe JAILLET Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200208140920.7652-1-christophe.jaillet@wanadoo.fr --- arch/powerpc/platforms/83xx/km83xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 306be75faec7..bcdc2c203ec9 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -60,10 +60,12 @@ static void quirk_mpc8360e_qe_enet10(void) ret = of_address_to_resource(np_par, 0, &res); if (ret) { pr_warn("%s couldn't map par_io registers\n", __func__); - return; + goto out; } base = ioremap(res.start, resource_size(&res)); + if (!base) + goto out; /* * set output delay adjustments to default values according @@ -111,6 +113,7 @@ static void quirk_mpc8360e_qe_enet10(void) setbits32((base + 0xac), 0x0000c000); } iounmap(base); +out: of_node_put(np_par); } -- cgit v1.2.3-59-g8ed1b From 860286cf33963fa8a0fe542995bdec2df5cb3abb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:58:56 +0100 Subject: powerpc/kernel: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-1-gregkh@linuxfoundation.org --- arch/powerpc/kernel/fadump.c | 10 +++------- arch/powerpc/kernel/setup-common.c | 3 +-- arch/powerpc/kernel/traps.c | 25 +++++-------------------- 3 files changed, 9 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 265b4aa72252..59e60a9a9f5c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1455,7 +1455,6 @@ DEFINE_SHOW_ATTRIBUTE(fadump_region); static void fadump_init_files(void) { - struct dentry *debugfs_file; int rc = 0; fadump_kobj = kobject_create_and_add("fadump", kernel_kobj); @@ -1463,12 +1462,9 @@ static void fadump_init_files(void) pr_err("failed to create fadump kobject\n"); return; } - debugfs_file = debugfs_create_file("fadump_region", 0444, - powerpc_debugfs_root, NULL, - &fadump_region_fops); - if (!debugfs_file) - printk(KERN_ERR "fadump: unable to create debugfs file" - " fadump_region\n"); + + debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL, + &fadump_region_fops); if (fw_dump.dump_active) { rc = sysfs_create_file(fadump_kobj, &release_attr.attr); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 7f8c890360fe..f9c0d888ce8a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -787,8 +787,7 @@ EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL); - - return powerpc_debugfs_root == NULL; + return 0; } arch_initcall(powerpc_debugfs_init); #endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 82a3438300fd..3fca22276bb1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2278,35 +2278,20 @@ void ppc_warn_emulated_print(const char *type) static int __init ppc_warn_emulated_init(void) { - struct dentry *dir, *d; + struct dentry *dir; unsigned int i; struct ppc_emulated_entry *entries = (void *)&ppc_emulated; - if (!powerpc_debugfs_root) - return -ENODEV; - dir = debugfs_create_dir("emulated_instructions", powerpc_debugfs_root); - if (!dir) - return -ENOMEM; - d = debugfs_create_u32("do_warn", 0644, dir, - &ppc_warn_emulated); - if (!d) - goto fail; + debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated); - for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) { - d = debugfs_create_u32(entries[i].name, 0644, dir, - (u32 *)&entries[i].val.counter); - if (!d) - goto fail; - } + for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) + debugfs_create_u32(entries[i].name, 0644, dir, + (u32 *)&entries[i].val.counter); return 0; - -fail: - debugfs_remove_recursive(dir); - return -ENOMEM; } device_initcall(ppc_warn_emulated_init); -- cgit v1.2.3-59-g8ed1b From c4fd527f52ecb135018655c7f56f87800872c5bc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:58:57 +0100 Subject: powerpc/kvm: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Because of this cleanup, we get to remove a few fields in struct kvm_arch that are now unused. Signed-off-by: Greg Kroah-Hartman [mpe: Fix build error in kvm/timing.c, adapt kvmppc_remove_cpu_debugfs()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-2-gregkh@linuxfoundation.org --- arch/powerpc/include/asm/kvm_host.h | 3 --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 5 ++--- arch/powerpc/kvm/book3s_64_mmu_radix.c | 5 ++--- arch/powerpc/kvm/book3s_hv.c | 9 ++------- arch/powerpc/kvm/timing.c | 17 ++++------------- 5 files changed, 10 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6e8b8ffd06ad..877f8aa2bc1e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -308,8 +308,6 @@ struct kvm_arch { pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; - struct dentry *htab_dentry; - struct dentry *radix_dentry; struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -830,7 +828,6 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ struct dentry *debugfs_dir; - struct dentry *debugfs_timings; #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6c372f5c61b6..8b4eac0c9dcd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2138,9 +2138,8 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - kvm->arch.htab_dentry = debugfs_create_file("htab", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_htab_fops); + debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_htab_fops); } void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 803940d79b73..1d75ed684b53 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1376,9 +1376,8 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - kvm->arch.radix_dentry = debugfs_create_file("radix", 0400, - kvm->arch.debugfs_dir, kvm, - &debugfs_radix_fops); + debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + &debugfs_radix_fops); } int kvmppc_radix_init(void) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2cefd071b848..33be4d93248a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2258,14 +2258,9 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) struct kvm *kvm = vcpu->kvm; snprintf(buf, sizeof(buf), "vcpu%u", id); - if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir)) - return; vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir)) - return; - vcpu->arch.debugfs_timings = - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, - vcpu, &debugfs_timings_ops); + debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + &debugfs_timings_ops); } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index bfe4f106cffc..ba56a5cbba97 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -211,23 +211,14 @@ void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, - kvm_debugfs_dir, vcpu, - &kvmppc_exit_timing_fops); - - if (!debugfs_file) { - printk(KERN_ERR"%s: error creating debugfs file %s\n", - __func__, dbg_fname); - return; - } + debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, + vcpu, &kvmppc_exit_timing_fops); vcpu->arch.debugfs_exit_timing = debugfs_file; } void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) { - if (vcpu->arch.debugfs_exit_timing) { - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; - } + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; } -- cgit v1.2.3-59-g8ed1b From 08f6a7974ab949d43c2584f966db458d255b2b0a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:58:58 +0100 Subject: powerpc/mm: book3s64: hash_utils: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-3-gregkh@linuxfoundation.org --- arch/powerpc/mm/book3s64/hash_utils.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 523d4d39d11e..7e5714a69a58 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -2018,11 +2018,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n") static int __init hash64_debugfs(void) { - if (!debugfs_create_file_unsafe("hpt_order", 0600, powerpc_debugfs_root, - NULL, &fops_hpt_order)) { - pr_err("lpar: unable to create hpt_order debugsfs file\n"); - } - + debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, NULL, + &fops_hpt_order); return 0; } machine_device_initcall(pseries, hash64_debugfs); -- cgit v1.2.3-59-g8ed1b From f3c05201950a71c1b8b4f11828303f76c5dd0944 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:58:59 +0100 Subject: powerpc/mm: ptdump: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-4-gregkh@linuxfoundation.org --- arch/powerpc/mm/ptdump/bats.c | 8 +++----- arch/powerpc/mm/ptdump/hashpagetable.c | 7 ++----- arch/powerpc/mm/ptdump/ptdump.c | 8 +++----- arch/powerpc/mm/ptdump/segment_regs.c | 8 +++----- 4 files changed, 11 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index 4154feac1da3..d3a5d6b318d1 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -164,10 +164,8 @@ static const struct file_operations bats_fops = { static int __init bats_init(void) { - struct dentry *debugfs_file; - - debugfs_file = debugfs_create_file("block_address_translation", 0400, - powerpc_debugfs_root, NULL, &bats_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("block_address_translation", 0400, + powerpc_debugfs_root, NULL, &bats_fops); + return 0; } device_initcall(bats_init); diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index a07278027c6f..b6ed9578382f 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -527,13 +527,10 @@ static const struct file_operations ptdump_fops = { static int ptdump_init(void) { - struct dentry *debugfs_file; - if (!radix_enabled()) { populate_markers(); - debugfs_file = debugfs_create_file("kernel_hash_pagetable", - 0400, NULL, NULL, &ptdump_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("kernel_hash_pagetable", 0400, NULL, NULL, + &ptdump_fops); } return 0; } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 206156255247..d92bb8ea229c 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -417,12 +417,10 @@ void ptdump_check_wx(void) static int ptdump_init(void) { - struct dentry *debugfs_file; - populate_markers(); build_pgtable_complete_mask(); - debugfs_file = debugfs_create_file("kernel_page_tables", 0400, NULL, - NULL, &ptdump_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return 0; } device_initcall(ptdump_init); diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index 501843664bb9..dde2fe8de4b2 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -55,10 +55,8 @@ static const struct file_operations sr_fops = { static int __init sr_init(void) { - struct dentry *debugfs_file; - - debugfs_file = debugfs_create_file("segment_registers", 0400, - powerpc_debugfs_root, NULL, &sr_fops); - return debugfs_file ? 0 : -ENOMEM; + debugfs_create_file("segment_registers", 0400, powerpc_debugfs_root, + NULL, &sr_fops); + return 0; } device_initcall(sr_init); -- cgit v1.2.3-59-g8ed1b From e04906aa1fbaf435b9a1f2ec6bbe7971a3ef8e42 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:59:00 +0100 Subject: powerpc/cell/axon_msi: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-5-gregkh@linuxfoundation.org --- arch/powerpc/platforms/cell/axon_msi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 57c4e0e86c88..ca2555b8a0c2 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -480,10 +480,6 @@ void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic) snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn)); - if (!debugfs_create_file(name, 0600, powerpc_debugfs_root, - msic, &fops_msic)) { - pr_devel("axon_msi: debugfs_create_file failed!\n"); - return; - } + debugfs_create_file(name, 0600, powerpc_debugfs_root, msic, &fops_msic); } #endif /* DEBUG */ -- cgit v1.2.3-59-g8ed1b From f344f0ab993987ae29cb39cc52054d7346db082f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 11:59:01 +0100 Subject: powerpc/powernv: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Signed-off-by: Greg Kroah-Hartman Reviewed-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200209105901.1620958-6-gregkh@linuxfoundation.org --- arch/powerpc/platforms/powernv/memtrace.c | 7 ------ arch/powerpc/platforms/powernv/opal-imc.c | 24 ++++++------------- arch/powerpc/platforms/powernv/pci-ioda.c | 5 ---- arch/powerpc/platforms/powernv/vas-debug.c | 37 +++--------------------------- 4 files changed, 10 insertions(+), 63 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index eb2e75dac369..d6d64f8718e6 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -187,11 +187,6 @@ static int memtrace_init_debugfs(void) snprintf(ent->name, 16, "%08x", ent->nid); dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); - if (!dir) { - pr_err("Failed to create debugfs directory for node %d\n", - ent->nid); - return -1; - } ent->dir = dir; debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); @@ -314,8 +309,6 @@ static int memtrace_init(void) { memtrace_debugfs_dir = debugfs_create_dir("memtrace", powerpc_debugfs_root); - if (!memtrace_debugfs_dir) - return -1; debugfs_create_file("enable", 0600, memtrace_debugfs_dir, NULL, &memtrace_init_fops); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 000b350d4060..968b9a4d1cd9 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -35,11 +35,10 @@ static int imc_mem_set(void *data, u64 val) } DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); -static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode, - struct dentry *parent, u64 *value) +static void imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) { - return debugfs_create_file_unsafe(name, mode, parent, - value, &fops_imc_x64); + debugfs_create_file_unsafe(name, mode, parent, value, &fops_imc_x64); } /* @@ -59,9 +58,6 @@ static void export_imc_mode_and_cmd(struct device_node *node, imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); - if (!imc_debugfs_parent) - return; - if (of_property_read_u32(node, "cb_offset", &cb_offset)) cb_offset = IMC_CNTL_BLK_OFFSET; @@ -69,21 +65,15 @@ static void export_imc_mode_and_cmd(struct device_node *node, loc = (u64)(ptr->vbase) + cb_offset; imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); sprintf(mode, "imc_mode_%d", (u32)(ptr->id)); - if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, - imc_mode_addr)) - goto err; + imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr); imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id)); - if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, - imc_cmd_addr)) - goto err; + imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr); ptr++; } - return; - -err: - debugfs_remove_recursive(imc_debugfs_parent); } /* diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 22c22cd7bd82..57d3a6af1d52 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3174,11 +3174,6 @@ static void pnv_pci_ioda_create_dbgfs(void) sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); - if (!phb->dbgfs) { - pr_warn("%s: Error on creating debugfs on PHB#%x\n", - __func__, hose->global_number); - continue; - } debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, phb, &pnv_pci_diag_data_fops); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 09e63df53c30..44035a3d6414 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -115,7 +115,7 @@ void vas_window_free_dbgdir(struct vas_window *window) void vas_window_init_dbgdir(struct vas_window *window) { - struct dentry *f, *d; + struct dentry *d; if (!window->vinst->dbgdir) return; @@ -127,28 +127,10 @@ void vas_window_init_dbgdir(struct vas_window *window) snprintf(window->dbgname, 16, "w%d", window->winid); d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); - if (IS_ERR(d)) - goto free_name; - window->dbgdir = d; - f = debugfs_create_file("info", 0444, d, window, &info_fops); - if (IS_ERR(f)) - goto remove_dir; - - f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); - if (IS_ERR(f)) - goto remove_dir; - - return; - -remove_dir: - debugfs_remove_recursive(window->dbgdir); - window->dbgdir = NULL; - -free_name: - kfree(window->dbgname); - window->dbgname = NULL; + debugfs_create_file("info", 0444, d, window, &info_fops); + debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); } void vas_instance_init_dbgdir(struct vas_instance *vinst) @@ -156,8 +138,6 @@ void vas_instance_init_dbgdir(struct vas_instance *vinst) struct dentry *d; vas_init_dbgdir(); - if (!vas_debugfs) - return; vinst->dbgname = kzalloc(16, GFP_KERNEL); if (!vinst->dbgname) @@ -166,16 +146,7 @@ void vas_instance_init_dbgdir(struct vas_instance *vinst) snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id); d = debugfs_create_dir(vinst->dbgname, vas_debugfs); - if (IS_ERR(d)) - goto free_name; - vinst->dbgdir = d; - return; - -free_name: - kfree(vinst->dbgname); - vinst->dbgname = NULL; - vinst->dbgdir = NULL; } /* @@ -191,6 +162,4 @@ void vas_init_dbgdir(void) first_time = false; vas_debugfs = debugfs_create_dir("vas", NULL); - if (IS_ERR(vas_debugfs)) - vas_debugfs = NULL; } -- cgit v1.2.3-59-g8ed1b From c4b78169e3667413184c9a20e11b5832288a109f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 23 Dec 2019 17:03:51 +1100 Subject: powerpc/book3s64: Fix error handling in mm_iommu_do_alloc() The last jump to free_exit in mm_iommu_do_alloc() happens after page pointers in struct mm_iommu_table_group_mem_t were already converted to physical addresses. Thus calling put_page() on these physical addresses will likely crash. This moves the loop which calculates the pageshift and converts page struct pointers to physical addresses later after the point when we cannot fail; thus eliminating the need to convert pointers back. Fixes: eb9d7a62c386 ("powerpc/mm_iommu: Fix potential deadlock") Reported-by: Jan Kara Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191223060351.26359-1-aik@ozlabs.ru --- arch/powerpc/mm/book3s64/iommu_api.c | 39 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index eba73ebd8ae5..fa05bbd1f682 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -121,24 +121,6 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto free_exit; } - pageshift = PAGE_SHIFT; - for (i = 0; i < entries; ++i) { - struct page *page = mem->hpages[i]; - - /* - * Allow to use larger than 64k IOMMU pages. Only do that - * if we are backed by hugetlb. - */ - if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) - pageshift = page_shift(compound_head(page)); - mem->pageshift = min(mem->pageshift, pageshift); - /* - * We don't need struct page reference any more, switch - * to physical address. - */ - mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; - } - good_exit: atomic64_set(&mem->mapped, 1); mem->used = 1; @@ -158,6 +140,27 @@ good_exit: } } + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { + /* + * Allow to use larger than 64k IOMMU pages. Only do that + * if we are backed by hugetlb. Skip device memory as it is not + * backed with page structs. + */ + pageshift = PAGE_SHIFT; + for (i = 0; i < entries; ++i) { + struct page *page = mem->hpages[i]; + + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) + pageshift = page_shift(compound_head(page)); + mem->pageshift = min(mem->pageshift, pageshift); + /* + * We don't need struct page reference any more, switch + * to physical address. + */ + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + } + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); mutex_unlock(&mem_list_mutex); -- cgit v1.2.3-59-g8ed1b From 6453f9ed9d4e4b4cdf201bf34bf460c436bf50ea Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Feb 2020 09:41:35 +0000 Subject: powerpc/mm: Don't kmap_atomic() in pte_offset_map() on PPC32 On PPC32, pte_offset_map() does a kmap_atomic() in order to support page tables allocated in high memory, just like ARM and x86/32. But since at least 2008 and commit 8054a3428fbe ("powerpc: Remove dead CONFIG_HIGHPTE"), page tables are never allocated in high memory. When the page is in low mem, kmap_atomic() just returns the page address but still disable preemption and pagefault. And it is not an inlined function, so we suffer function call for no reason. Make pte_offset_map() the same as pte_offset_kernel() and make pte_unmap() void, in the same way as PPC64 which doesn't have HIGHMEM. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/03c97f0f6b3790d164822563be80f2fd4713a955.1581932480.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/32/pgtable.h | 6 ++---- arch/powerpc/include/asm/nohash/32/pgtable.h | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 5b39c11e884a..7549393c4c43 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -366,10 +366,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, addr) \ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) -#define pte_offset_map(dir, addr) \ - ((pte_t *)(kmap_atomic(pmd_page(*(dir))) + \ - (pmd_page_vaddr(*(dir)) & ~PAGE_MASK)) + pte_index(addr)) -#define pte_unmap(pte) kunmap_atomic(pte) +#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) +static inline void pte_unmap(pte_t *pte) { } /* * Encode and decode a swap entry. diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 60c4d829152e..b04ba257fddb 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -372,10 +372,8 @@ static inline int pte_young(pte_t pte) #define pte_offset_kernel(dir, addr) \ (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \ pte_index(addr)) -#define pte_offset_map(dir, addr) \ - ((pte_t *)(kmap_atomic(pmd_page(*(dir))) + \ - (pmd_page_vaddr(*(dir)) & ~PAGE_MASK)) + pte_index(addr)) -#define pte_unmap(pte) kunmap_atomic(pte) +#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) +static inline void pte_unmap(pte_t *pte) { } /* * Encode and decode a swap entry. -- cgit v1.2.3-59-g8ed1b From d42c6d0f8d004c3661dde3c376ed637e9f292c22 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Feb 2020 11:04:34 +1100 Subject: powerpc/Makefile: Mark phony targets as PHONY Some of our phony targets are not marked as such. This can lead to confusing errors, eg: $ make clean $ touch install $ make install make: 'install' is up to date. $ Fix it by adding them to the PHONY variable which is marked phony in the top-level Makefile, or in scripts/Makefile.build for the boot Makefile. Suggested-by: Masahiro Yamada Signed-off-by: Michael Ellerman Reviewed-by: Masahiro Yamada Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200219000434.15872-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 6 ++++++ arch/powerpc/boot/Makefile | 2 ++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f35730548e42..cbe5ca4f0ee5 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -298,6 +298,7 @@ $(BOOT_TARGETS2): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) +PHONY += bootwrapper_install bootwrapper_install: $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) @@ -403,9 +404,11 @@ define archhelp @echo ' (minus the .dts extension).' endef +PHONY += install install: $(Q)$(MAKE) $(build)=$(boot) install +PHONY += vdso_install vdso_install: ifdef CONFIG_PPC64 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ @@ -425,6 +428,7 @@ archheaders: ifdef CONFIG_STACKPROTECTOR prepare: stack_protector_prepare +PHONY += stack_protector_prepare stack_protector_prepare: prepare0 ifdef CONFIG_PPC64 $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h)) @@ -436,10 +440,12 @@ endif ifdef CONFIG_SMP prepare: task_cpu_prepare +PHONY += task_cpu_prepare task_cpu_prepare: prepare0 $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h)) endif +PHONY += checkbin # Check toolchain versions: # - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 0556bf4fc9e9..c53a1b8bba8b 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -445,6 +445,8 @@ install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ +PHONY += install zInstall + # anything not in $(targets) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ -- cgit v1.2.3-59-g8ed1b From 16985f2d25095899685952296f128a71f0aff05c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 17 Feb 2020 13:48:32 +1100 Subject: powerpc/powernv: Treat an empty reboot string as default Treat an empty reboot cmd string the same as a NULL string. This squashes a spurious unsupported reboot message that sometimes gets out when using xmon. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200217024833.30580-1-oohall@gmail.com --- arch/powerpc/platforms/powernv/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 11fdae81b5dd..a8fe630cf7cc 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -229,7 +229,7 @@ static void __noreturn pnv_restart(char *cmd) pnv_prepare_going_down(); do { - if (!cmd) + if (!cmd || !strlen(cmd)) rc = opal_cec_reboot(); else if (strcmp(cmd, "full") == 0) rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL); -- cgit v1.2.3-59-g8ed1b From 672e480aa21023fc8e4b6ab8635d8898822b97e7 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 17 Feb 2020 13:48:33 +1100 Subject: powerpc/powernv: Add explicit fast-reboot support Add a way to manually invoke a fast-reboot rather than setting the NVRAM flag. The idea is to allow userspace to invoke a fast-reboot using the optional string argument to the reboot() system call, or using the xmon zr command so we don't need to leave around a persistent changes on a system to use the feature. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200217024833.30580-2-oohall@gmail.com --- arch/powerpc/include/asm/opal-api.h | 1 + arch/powerpc/platforms/powernv/setup.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index c1f25a760eb1..1dffa3cb16ba 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1067,6 +1067,7 @@ enum { OPAL_REBOOT_PLATFORM_ERROR = 1, OPAL_REBOOT_FULL_IPL = 2, OPAL_REBOOT_MPIPL = 3, + OPAL_REBOOT_FAST = 4, }; /* Argument to OPAL_PCI_TCE_KILL */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index a8fe630cf7cc..3bc188da82ba 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -237,6 +237,8 @@ static void __noreturn pnv_restart(char *cmd) rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL); else if (strcmp(cmd, "error") == 0) rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL); + else if (strcmp(cmd, "fast") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL); else rc = OPAL_UNSUPPORTED; -- cgit v1.2.3-59-g8ed1b From fcdb524d440d6326c286006e16f252b40ba4fd6a Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Fri, 14 Feb 2020 13:36:05 +0530 Subject: powerpc/kernel/sysfs: Refactor current sysfs.c An attempt to refactor the current sysfs.c file. To start with a big chuck of macro #defines and dscr functions are moved to start of the file. Secondly, HAS_ #define macros are cleanup based on CONFIG_ options Finally new HAS_ macro added: 1. HAS_PPC_PA6T (for PA6T) to separate out non-PMU SPRs. 2. HAS_PPC_PMC56 to separate out PMC SPR's from HAS_PPC_PMC_CLASSIC which come under CONFIG_PPC64. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200214080606.26872-1-kjain@linux.ibm.com --- arch/powerpc/kernel/sysfs.c | 375 +++++++++++++++++++++++--------------------- 1 file changed, 200 insertions(+), 175 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 80a676da11cb..74da5ebf088e 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -87,6 +87,155 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay); #endif /* CONFIG_PPC64 */ +#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \ +static void read_##NAME(void *val) \ +{ \ + *(unsigned long *)val = mfspr(ADDRESS); \ +} \ +static void write_##NAME(void *val) \ +{ \ + EXTRA; \ + mtspr(ADDRESS, *(unsigned long *)val); \ +} + +#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \ +static ssize_t show_##NAME(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ + unsigned long val; \ + smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ + return sprintf(buf, "%lx\n", val); \ +} \ +static ssize_t __used \ + store_##NAME(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct cpu *cpu = container_of(dev, struct cpu, dev); \ + unsigned long val; \ + int ret = sscanf(buf, "%lx", &val); \ + if (ret != 1) \ + return -EINVAL; \ + smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ + return count; \ +} + +#define SYSFS_PMCSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) +#define SYSFS_SPRSETUP(NAME, ADDRESS) \ + __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) + +#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \ + __SYSFS_SPRSETUP_SHOW_STORE(NAME) + +#ifdef CONFIG_PPC64 + +/* + * This is the system wide DSCR register default value. Any + * change to this default value through the sysfs interface + * will update all per cpu DSCR default values across the + * system stored in their respective PACA structures. + */ +static unsigned long dscr_default; + +/** + * read_dscr() - Fetch the cpu specific DSCR default + * @val: Returned cpu specific DSCR default value + * + * This function returns the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ +static void read_dscr(void *val) +{ + *(unsigned long *)val = get_paca()->dscr_default; +} + + +/** + * write_dscr() - Update the cpu specific DSCR default + * @val: New cpu specific DSCR default value to update + * + * This function updates the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ +static void write_dscr(void *val) +{ + get_paca()->dscr_default = *(unsigned long *)val; + if (!current->thread.dscr_inherit) { + current->thread.dscr = *(unsigned long *)val; + mtspr(SPRN_DSCR, *(unsigned long *)val); + } +} + +SYSFS_SPRSETUP_SHOW_STORE(dscr); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); + +static void add_write_permission_dev_attr(struct device_attribute *attr) +{ + attr->attr.mode |= 0200; +} + +/** + * show_dscr_default() - Fetch the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * + * This function returns the system wide DSCR default value. + */ +static ssize_t show_dscr_default(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +/** + * store_dscr_default() - Update the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * @count: Size of the update + * + * This function updates the system wide DSCR default value. + */ +static ssize_t __used store_dscr_default(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + on_each_cpu(write_dscr, &val, 1); + + return count; +} + +static DEVICE_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + if (cpu_has_feature(CPU_FTR_DSCR)) { + int err = 0; + int cpu; + + dscr_default = spr_default_dscr; + for_each_possible_cpu(cpu) + paca_ptrs[cpu]->dscr_default = dscr_default; + + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); + } +} +#endif /* CONFIG_PPC64 */ + #ifdef CONFIG_PPC_FSL_BOOK3E #define MAX_BIT 63 @@ -407,84 +556,33 @@ void ppc_enable_pmcs(void) } EXPORT_SYMBOL(ppc_enable_pmcs); -#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \ -static void read_##NAME(void *val) \ -{ \ - *(unsigned long *)val = mfspr(ADDRESS); \ -} \ -static void write_##NAME(void *val) \ -{ \ - EXTRA; \ - mtspr(ADDRESS, *(unsigned long *)val); \ -} -#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \ -static ssize_t show_##NAME(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - struct cpu *cpu = container_of(dev, struct cpu, dev); \ - unsigned long val; \ - smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \ - return sprintf(buf, "%lx\n", val); \ -} \ -static ssize_t __used \ - store_##NAME(struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t count) \ -{ \ - struct cpu *cpu = container_of(dev, struct cpu, dev); \ - unsigned long val; \ - int ret = sscanf(buf, "%lx", &val); \ - if (ret != 1) \ - return -EINVAL; \ - smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \ - return count; \ -} - -#define SYSFS_PMCSETUP(NAME, ADDRESS) \ - __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \ - __SYSFS_SPRSETUP_SHOW_STORE(NAME) -#define SYSFS_SPRSETUP(NAME, ADDRESS) \ - __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \ - __SYSFS_SPRSETUP_SHOW_STORE(NAME) - -#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \ - __SYSFS_SPRSETUP_SHOW_STORE(NAME) /* Let's define all possible registers, we'll only hook up the ones * that are implemented on the current processor */ -#if defined(CONFIG_PPC64) +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32) #define HAS_PPC_PMC_CLASSIC 1 #define HAS_PPC_PMC_IBM 1 -#define HAS_PPC_PMC_PA6T 1 -#elif defined(CONFIG_PPC_BOOK3S_32) -#define HAS_PPC_PMC_CLASSIC 1 -#define HAS_PPC_PMC_IBM 1 -#define HAS_PPC_PMC_G4 1 #endif +#ifdef CONFIG_PPC64 +#define HAS_PPC_PMC_PA6T 1 +#define HAS_PPC_PMC56 1 +#endif -#ifdef HAS_PPC_PMC_CLASSIC -SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); -SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); -SYSFS_PMCSETUP(pmc1, SPRN_PMC1); -SYSFS_PMCSETUP(pmc2, SPRN_PMC2); -SYSFS_PMCSETUP(pmc3, SPRN_PMC3); -SYSFS_PMCSETUP(pmc4, SPRN_PMC4); -SYSFS_PMCSETUP(pmc5, SPRN_PMC5); -SYSFS_PMCSETUP(pmc6, SPRN_PMC6); - -#ifdef HAS_PPC_PMC_G4 -SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2); +#ifdef CONFIG_PPC_BOOK3S_32 +#define HAS_PPC_PMC_G4 1 #endif +#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC) +#define HAS_PPC_PA6T +#endif +/* + * SPRs which are not related to PMU. + */ #ifdef CONFIG_PPC64 -SYSFS_PMCSETUP(pmc7, SPRN_PMC7); -SYSFS_PMCSETUP(pmc8, SPRN_PMC8); - -SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_SPRSETUP(purr, SPRN_PURR); SYSFS_SPRSETUP(spurr, SPRN_SPURR); SYSFS_SPRSETUP(pir, SPRN_PIR); @@ -495,115 +593,38 @@ SYSFS_SPRSETUP(tscr, SPRN_TSCR); enable write when needed with a separate function. Lets be conservative and default to pseries. */ -static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr); +#endif /* CONFIG_PPC64 */ -/* - * This is the system wide DSCR register default value. Any - * change to this default value through the sysfs interface - * will update all per cpu DSCR default values across the - * system stored in their respective PACA structures. - */ -static unsigned long dscr_default; - -/** - * read_dscr() - Fetch the cpu specific DSCR default - * @val: Returned cpu specific DSCR default value - * - * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. - */ -static void read_dscr(void *val) -{ - *(unsigned long *)val = get_paca()->dscr_default; -} - - -/** - * write_dscr() - Update the cpu specific DSCR default - * @val: New cpu specific DSCR default value to update - * - * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. - */ -static void write_dscr(void *val) -{ - get_paca()->dscr_default = *(unsigned long *)val; - if (!current->thread.dscr_inherit) { - current->thread.dscr = *(unsigned long *)val; - mtspr(SPRN_DSCR, *(unsigned long *)val); - } -} - -SYSFS_SPRSETUP_SHOW_STORE(dscr); -static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); - -static void add_write_permission_dev_attr(struct device_attribute *attr) -{ - attr->attr.mode |= 0200; -} - -/** - * show_dscr_default() - Fetch the system wide DSCR default - * @dev: Device structure - * @attr: Device attribute structure - * @buf: Interface buffer - * - * This function returns the system wide DSCR default value. - */ -static ssize_t show_dscr_default(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "%lx\n", dscr_default); -} - -/** - * store_dscr_default() - Update the system wide DSCR default - * @dev: Device structure - * @attr: Device attribute structure - * @buf: Interface buffer - * @count: Size of the update - * - * This function updates the system wide DSCR default value. - */ -static ssize_t __used store_dscr_default(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - unsigned long val; - int ret = 0; - - ret = sscanf(buf, "%lx", &val); - if (ret != 1) - return -EINVAL; - dscr_default = val; +#ifdef HAS_PPC_PMC_CLASSIC +SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); +SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); +SYSFS_PMCSETUP(pmc1, SPRN_PMC1); +SYSFS_PMCSETUP(pmc2, SPRN_PMC2); +SYSFS_PMCSETUP(pmc3, SPRN_PMC3); +SYSFS_PMCSETUP(pmc4, SPRN_PMC4); +SYSFS_PMCSETUP(pmc5, SPRN_PMC5); +SYSFS_PMCSETUP(pmc6, SPRN_PMC6); +#endif - on_each_cpu(write_dscr, &val, 1); +#ifdef HAS_PPC_PMC_G4 +SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2); +#endif - return count; -} +#ifdef HAS_PPC_PMC56 +SYSFS_PMCSETUP(pmc7, SPRN_PMC7); +SYSFS_PMCSETUP(pmc8, SPRN_PMC8); -static DEVICE_ATTR(dscr_default, 0600, - show_dscr_default, store_dscr_default); +SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); -static void sysfs_create_dscr_default(void) -{ - if (cpu_has_feature(CPU_FTR_DSCR)) { - int err = 0; - int cpu; +static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +#endif /* HAS_PPC_PMC56 */ - dscr_default = spr_default_dscr; - for_each_possible_cpu(cpu) - paca_ptrs[cpu]->dscr_default = dscr_default; - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); - } -} -#endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0); @@ -612,7 +633,9 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2); SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); -#ifdef CONFIG_DEBUG_MISC +#endif + +#ifdef HAS_PPC_PA6T SYSFS_SPRSETUP(hid0, SPRN_HID0); SYSFS_SPRSETUP(hid1, SPRN_HID1); SYSFS_SPRSETUP(hid4, SPRN_HID4); @@ -641,15 +664,14 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); -#endif /* CONFIG_DEBUG_MISC */ -#endif /* HAS_PPC_PMC_PA6T */ +#endif /* HAS_PPC_PA6T */ #ifdef HAS_PPC_PMC_IBM static struct device_attribute ibm_common_attrs[] = { __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), }; -#endif /* HAS_PPC_PMC_G4 */ +#endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 static struct device_attribute g4_common_attrs[] = { @@ -659,6 +681,7 @@ static struct device_attribute g4_common_attrs[] = { }; #endif /* HAS_PPC_PMC_G4 */ +#ifdef HAS_PPC_PMC_CLASSIC static struct device_attribute classic_pmc_attrs[] = { __ATTR(pmc1, 0600, show_pmc1, store_pmc1), __ATTR(pmc2, 0600, show_pmc2, store_pmc2), @@ -666,14 +689,16 @@ static struct device_attribute classic_pmc_attrs[] = { __ATTR(pmc4, 0600, show_pmc4, store_pmc4), __ATTR(pmc5, 0600, show_pmc5, store_pmc5), __ATTR(pmc6, 0600, show_pmc6, store_pmc6), -#ifdef CONFIG_PPC64 +#ifdef HAS_PPC_PMC56 __ATTR(pmc7, 0600, show_pmc7, store_pmc7), __ATTR(pmc8, 0600, show_pmc8, store_pmc8), #endif }; +#endif -#ifdef HAS_PPC_PMC_PA6T +#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T) static struct device_attribute pa6t_attrs[] = { +#ifdef HAS_PPC_PMC_PA6T __ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), __ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), __ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0), @@ -682,7 +707,8 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), -#ifdef CONFIG_DEBUG_MISC +#endif +#ifdef HAS_PPC_PA6T __ATTR(hid0, 0600, show_hid0, store_hid0), __ATTR(hid1, 0600, show_hid1, store_hid1), __ATTR(hid4, 0600, show_hid4, store_hid4), @@ -711,10 +737,9 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(tsr1, 0600, show_tsr1, store_tsr1), __ATTR(tsr2, 0600, show_tsr2, store_tsr2), __ATTR(tsr3, 0600, show_tsr3, store_tsr3), -#endif /* CONFIG_DEBUG_MISC */ +#endif /* HAS_PPC_PA6T */ }; -#endif /* HAS_PPC_PMC_PA6T */ -#endif /* HAS_PPC_PMC_CLASSIC */ +#endif #ifdef CONFIG_PPC_SVM static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf) @@ -765,14 +790,14 @@ static int register_cpu_online(unsigned int cpu) pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ -#ifdef HAS_PPC_PMC_PA6T +#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; -#endif /* HAS_PPC_PMC_PA6T */ +#endif default: attrs = NULL; nattrs = 0; @@ -854,14 +879,14 @@ static int unregister_cpu_online(unsigned int cpu) pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ -#ifdef HAS_PPC_PMC_PA6T +#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); pmc_attrs = NULL; break; -#endif /* HAS_PPC_PMC_PA6T */ +#endif default: attrs = NULL; nattrs = 0; -- cgit v1.2.3-59-g8ed1b From 22697da36d0cee57c2a5750ef7d84e4d88da17e7 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 14 Feb 2020 13:36:06 +0530 Subject: powerpc/kernel/sysfs: Add new config option PMU_SYSFS to enable PMU SPRs sysfs file creation Many of the performance monitoring unit (PMU) SPRs are exposed in the sysfs. This may not be a desirable since "perf" API is the primary interface to program PMU and collect counter data in the system. But that said, we cant remove these sysfs files since we dont whether anyone/anything is using them. So the patch adds a new CONFIG option 'CONFIG_PMU_SYSFS' (user selectable) to be used in sysfs file creation for PMU SPRs. New option by default is disabled, but can be enabled if user needs it. Tested this patch behaviour in powernv and pseries machines. Patch is also tested for pmac32_defconfig. Signed-off-by: Kajol Jain Tested-by: Nageswara R Sastry Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200214080606.26872-2-kjain@linux.ibm.com --- arch/powerpc/kernel/sysfs.c | 6 ++++++ arch/powerpc/platforms/Kconfig.cputype | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 74da5ebf088e..479c70680b76 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -562,6 +562,7 @@ EXPORT_SYMBOL(ppc_enable_pmcs); * that are implemented on the current processor */ +#ifdef CONFIG_PMU_SYSFS #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32) #define HAS_PPC_PMC_CLASSIC 1 #define HAS_PPC_PMC_IBM 1 @@ -575,6 +576,7 @@ EXPORT_SYMBOL(ppc_enable_pmcs); #ifdef CONFIG_PPC_BOOK3S_32 #define HAS_PPC_PMC_G4 1 #endif +#endif /* CONFIG_PMU_SYSFS */ #if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC) #define HAS_PPC_PA6T @@ -812,8 +814,10 @@ static int register_cpu_online(unsigned int cpu) device_create_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 +#ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); +#endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { if (!firmware_has_feature(FW_FEATURE_LPAR)) @@ -901,8 +905,10 @@ static int unregister_cpu_online(unsigned int cpu) device_remove_file(s, &pmc_attrs[i]); #ifdef CONFIG_PPC64 +#ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_remove_file(s, &dev_attr_mmcra); +#endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) device_remove_file(s, &dev_attr_purr); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 6caedc88474f..4208724e9f28 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -425,6 +425,12 @@ config PPC_MM_SLICES config PPC_HAVE_PMU_SUPPORT bool +config PMU_SYSFS + bool "Create PMU SPRs sysfs file" + default n + help + This option enables sysfs file creation for PMU SPRs like MMCR* and PMC*. + config PPC_PERF_CTRS def_bool y depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT -- cgit v1.2.3-59-g8ed1b From 3d13e839e801e081bdece0127c2affa33d0f77cf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Feb 2020 22:51:37 +1100 Subject: powerpc: Rename current_stack_pointer() to current_stack_frame() current_stack_pointer(), which was called __get_SP(), used to just return the value in r1. But that caused problems in some cases, so it was turned into a function in commit bfe9a2cfe91a ("powerpc: Reimplement __get_SP() as a function not a define"). Because it's a function in a separate compilation unit to all its callers, it has the effect of causing a stack frame to be created, and then returns the address of that frame. This is good in some cases like those described in the above commit, but in other cases it's overkill, we just need to know what stack page we're on. On some other arches current_stack_pointer is just a register global giving the stack pointer, and we'd like to do that too. So rename our current_stack_pointer() to current_stack_frame() to make that possible. Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20200220115141.2707-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/perf_event.h | 2 +- arch/powerpc/include/asm/reg.h | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/misc.S | 4 ++-- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/stacktrace.c | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 7426d7a90e1e..eed3954082fa 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -32,7 +32,7 @@ do { \ (regs)->result = 0; \ (regs)->nip = __ip; \ - (regs)->gpr[1] = current_stack_pointer(); \ + (regs)->gpr[1] = current_stack_frame(); \ asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \ } while (0) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1aa46dff0957..1b1ffdba6097 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1448,7 +1448,7 @@ static inline void mtsrin(u32 val, u32 idx) #define proc_trap() asm volatile("trap") -extern unsigned long current_stack_pointer(void); +extern unsigned long current_stack_frame(void); extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c9b11878555..02118c18434d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -602,7 +602,7 @@ static inline void check_stack_overflow(void) #ifdef CONFIG_DEBUG_STACKOVERFLOW long sp; - sp = current_stack_pointer() & (THREAD_SIZE-1); + sp = current_stack_frame() & (THREAD_SIZE-1); /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < 2048)) { @@ -647,7 +647,7 @@ void do_IRQ(struct pt_regs *regs) void *cursp, *irqsp, *sirqsp; /* Switch to the irq stack to handle this */ - cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + cursp = (void *)(current_stack_frame() & ~(THREAD_SIZE - 1)); irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 974f65f79a8e..65f9f731c229 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -110,7 +110,7 @@ _GLOBAL(longjmp) li r3, 1 blr -_GLOBAL(current_stack_pointer) +_GLOBAL(current_stack_frame) PPC_LL r3,0(r1) blr -EXPORT_SYMBOL(current_stack_pointer) +EXPORT_SYMBOL(current_stack_frame) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e730b8e522b0..110db94cdf3c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2051,7 +2051,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) sp = (unsigned long) stack; if (sp == 0) { if (tsk == current) - sp = current_stack_pointer(); + sp = current_stack_frame(); else sp = tsk->thread.ksp; } diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e2a46cfed5fd..c477b8585a29 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -57,7 +57,7 @@ void save_stack_trace(struct stack_trace *trace) { unsigned long sp; - sp = current_stack_pointer(); + sp = current_stack_frame(); save_context_stack(trace, sp, current, 1); } @@ -71,7 +71,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) return; if (tsk == current) - sp = current_stack_pointer(); + sp = current_stack_frame(); else sp = tsk->thread.ksp; @@ -131,7 +131,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, } if (tsk == current) - sp = current_stack_pointer(); + sp = current_stack_frame(); else sp = tsk->thread.ksp; -- cgit v1.2.3-59-g8ed1b From 0e63f0151719ee4cb90d85e60c98045099c995e2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 Feb 2020 22:51:38 +1100 Subject: powerpc: Add current_stack_pointer as a register global current_stack_frame() doesn't return the stack pointer, but the caller's stack frame. See commit bfe9a2cfe91a ("powerpc: Reimplement __get_SP() as a function not a define") and commit acf620ecf56c ("powerpc: Rename __get_SP() to current_stack_pointer()") for details. In some cases this is overkill or incorrect, as it doesn't return the current value of r1. So add a current_stack_pointer register global to get the value of r1 directly. Signed-off-by: Christophe Leroy [mpe: Split out of other patch, tweak change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200220115141.2707-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/reg.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1b1ffdba6097..da5cab038e25 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1450,6 +1450,8 @@ static inline void mtsrin(u32 val, u32 idx) extern unsigned long current_stack_frame(void); +register unsigned long current_stack_pointer asm("r1"); + extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); -- cgit v1.2.3-59-g8ed1b From 84ab14893054751a2b3adba725834183b872a17f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 Feb 2020 22:51:39 +1100 Subject: powerpc/irq: Use current_stack_pointer in check_stack_overflow() The purpose of check_stack_overflow() is to verify that the stack has not overflowed. To really know whether the stack pointer is still within boundaries, the check must be done directly on the value of r1. So use current_stack_pointer, which returns the current value of r1, rather than current_stack_frame() which causes a frame to be created and then returns that value. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200220115141.2707-3-mpe@ellerman.id.au --- arch/powerpc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 02118c18434d..c7d6f5cdffdb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -602,7 +602,7 @@ static inline void check_stack_overflow(void) #ifdef CONFIG_DEBUG_STACKOVERFLOW long sp; - sp = current_stack_frame() & (THREAD_SIZE-1); + sp = current_stack_pointer & (THREAD_SIZE - 1); /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < 2048)) { -- cgit v1.2.3-59-g8ed1b From 0dec6e1cca7eee6d5616f982efab2ff3b3ea1f9f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 Feb 2020 22:51:40 +1100 Subject: powerpc/irq: use IS_ENABLED() in check_stack_overflow() Instead of #ifdef, use IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW). This enable GCC to check for code validity even when the option is not selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200220115141.2707-4-mpe@ellerman.id.au --- arch/powerpc/kernel/irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c7d6f5cdffdb..46d5852fb00a 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -599,9 +599,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) static inline void check_stack_overflow(void) { -#ifdef CONFIG_DEBUG_STACKOVERFLOW long sp; + if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) + return; + sp = current_stack_pointer & (THREAD_SIZE - 1); /* check for stack overflow: is there less than 2KB free? */ @@ -609,7 +611,6 @@ static inline void check_stack_overflow(void) pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } -#endif } void __do_irq(struct pt_regs *regs) -- cgit v1.2.3-59-g8ed1b From 532d43a73cf191cf951d27e49d8d0a03e8cf2297 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 20 Feb 2020 22:51:41 +1100 Subject: powerpc/irq: Use current_stack_pointer in do_IRQ() Until commit 7306e83ccf5c ("powerpc: Don't use CURRENT_THREAD_INFO to find the stack"), the current stack base address was obtained by calling current_thread_info(). That inline function was simply masking out the value of r1. In that commit, it was changed to using current_stack_pointer() (since renamed current_stack_frame()), which is a heavier function as it is an outline assembly function which cannot be inlined and which reads the content of the stack at 0(r1). Convert to using current_stack_pointer for geting r1 and masking out its value to obtain the base address of the stack pointer as before. Fixes: 7306e83ccf5c ("powerpc: Don't use CURRENT_THREAD_INFO to find the stack") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200220115141.2707-5-mpe@ellerman.id.au --- arch/powerpc/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 46d5852fb00a..1bed18b7229e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -648,7 +648,7 @@ void do_IRQ(struct pt_regs *regs) void *cursp, *irqsp, *sirqsp; /* Switch to the irq stack to handle this */ - cursp = (void *)(current_stack_frame() & ~(THREAD_SIZE - 1)); + cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; -- cgit v1.2.3-59-g8ed1b From a05f0e5be4e81e4977d3f92aaf7688ee0cb7d5db Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 29 Jan 2020 19:21:21 +0530 Subject: powerpc/smp: Use nid as fallback for package_id package_id is to match cores that are part of the same chip. On PowerNV machines, package_id defaults to chip_id. However ibm,chip_id property is not present in device-tree of PowerVM LPARs. Hence lscpu output shows one core per socket and multiple cores. To overcome this, use nid as the package_id on PowerVM LPARs. Before the patch: Architecture: ppc64le Byte Order: Little Endian CPU(s): 128 On-line CPU(s) list: 0-127 Thread(s) per core: 8 Core(s) per socket: 1 <---------------------- Socket(s): 16 <---------------------- NUMA node(s): 2 Model: 2.2 (pvr 004e 0202) Model name: POWER9 (architected), altivec supported Hypervisor vendor: pHyp Virtualization type: para L1d cache: 32K L1i cache: 32K L2 cache: 512K L3 cache: 10240K NUMA node0 CPU(s): 0-63 NUMA node1 CPU(s): 64-127 # # cat /sys/devices/system/cpu/cpu0/topology/physical_package_id -1 After the patch: Architecture: ppc64le Byte Order: Little Endian CPU(s): 128 On-line CPU(s) list: 0-127 Thread(s) per core: 8 <--------------------- Core(s) per socket: 8 <--------------------- Socket(s): 2 NUMA node(s): 2 Model: 2.2 (pvr 004e 0202) Model name: POWER9 (architected), altivec supported Hypervisor vendor: pHyp Virtualization type: para L1d cache: 32K L1i cache: 32K L2 cache: 512K L3 cache: 10240K NUMA node0 CPU(s): 0-63 NUMA node1 CPU(s): 64-127 # # cat /sys/devices/system/cpu/cpu0/topology/physical_package_id 0 Now lscpu output is more in line with the system configuration. Signed-off-by: Srikar Dronamraju [mpe: Use pkg_id instead of ppid, tweak change log and comment] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200129135121.24617-1-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 6 ++++++ arch/powerpc/kernel/smp.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 2f7e1ea5089e..e2e1ccd4a18d 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -134,7 +134,13 @@ static inline void shared_proc_topology_init(void) {} #ifdef CONFIG_PPC64 #include +#ifdef CONFIG_PPC_SPLPAR +int get_physical_package_id(int cpu); +#define topology_physical_package_id(cpu) (get_physical_package_id(cpu)) +#else #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) +#endif + #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ea6adbf6a221..f68cde82bdf3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1185,10 +1185,34 @@ static inline void add_cpu_to_smallcore_masks(int cpu) } } +int get_physical_package_id(int cpu) +{ + int pkg_id = cpu_to_chip_id(cpu); + +#ifdef CONFIG_PPC_SPLPAR + /* + * If the platform is PowerNV or Guest on KVM, ibm,chip-id is + * defined. Hence we would return the chip-id as the result of + * get_physical_package_id. + */ + if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR)) { + struct device_node *np = of_get_cpu_node(cpu, NULL); + + if (np) { + pkg_id = of_node_to_nid(np); + of_node_put(np); + } + } +#endif /* CONFIG_PPC_SPLPAR */ + + return pkg_id; +} +EXPORT_SYMBOL_GPL(get_physical_package_id); + static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); - int chipid = cpu_to_chip_id(cpu); + int pkg_id = get_physical_package_id(cpu); int i; /* @@ -1217,11 +1241,11 @@ static void add_cpu_to_masks(int cpu) for_each_cpu(i, cpu_l2_cache_mask(cpu)) set_cpus_related(cpu, i, cpu_core_mask); - if (chipid == -1) + if (pkg_id == -1) return; for_each_cpu(i, cpu_online_mask) - if (cpu_to_chip_id(i) == chipid) + if (get_physical_package_id(i) == pkg_id) set_cpus_related(cpu, i, cpu_core_mask); } -- cgit v1.2.3-59-g8ed1b From e7214ae9d85aa325c9f3cb34bf4fad7f112861d7 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 29 Jan 2020 19:22:57 +0530 Subject: powerpc/vphn: Check for error from hcall_vphn There is no value in unpacking associativity, if H_HOME_NODE_ASSOCIATIVITY hcall has returned an error. Signed-off-by: Srikar Dronamraju Reported-by: Abdul Haleem Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200129135301.24739-2-srikar@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/vphn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c index 3f07bf6c670e..cca474a2c396 100644 --- a/arch/powerpc/platforms/pseries/vphn.c +++ b/arch/powerpc/platforms/pseries/vphn.c @@ -82,7 +82,8 @@ long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu); - vphn_unpack_associativity(retbuf, associativity); + if (rc == H_SUCCESS) + vphn_unpack_associativity(retbuf, associativity); return rc; } -- cgit v1.2.3-59-g8ed1b From 76b7bfb1732d139dc20a2f0c19ec32e09c8891cf Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 29 Jan 2020 19:22:58 +0530 Subject: powerpc/numa: Handle extra hcall_vphn error cases Currently code handles H_FUNCTION, H_SUCCESS, H_HARDWARE return codes. However hcall_vphn can return other return codes. Now it also handles H_PARAMETER return code. Also the rest return codes are handled under the default case. Signed-off-by: Srikar Dronamraju Reported-by: Abdul Haleem Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200129135301.24739-3-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3c7dec70cda0..2cb87c9a0544 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1191,23 +1191,30 @@ static long vphn_get_associativity(unsigned long cpu, VPHN_FLAG_VCPU, associativity); switch (rc) { + case H_SUCCESS: + dbg("VPHN hcall succeeded. Reset polling...\n"); + timed_topology_update(0); + goto out; + case H_FUNCTION: - printk_once(KERN_INFO - "VPHN is not supported. Disabling polling...\n"); - stop_topology_update(); + pr_err_ratelimited("VPHN unsupported. Disabling polling...\n"); break; case H_HARDWARE: - printk(KERN_ERR - "hcall_vphn() experienced a hardware fault " + pr_err_ratelimited("hcall_vphn() experienced a hardware fault " "preventing VPHN. Disabling polling...\n"); - stop_topology_update(); break; - case H_SUCCESS: - dbg("VPHN hcall succeeded. Reset polling...\n"); - timed_topology_update(0); + case H_PARAMETER: + pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. " + "Disabling polling...\n"); + break; + default: + pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n" + , rc); break; } + stop_topology_update(); +out: return rc; } -- cgit v1.2.3-59-g8ed1b From 413e40550c5cfdba7e062aa6350a0d2448014519 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 29 Jan 2020 19:22:59 +0530 Subject: powerpc/numa: Use cpu node map of first sibling thread All the sibling threads of a core have to be part of the same node. To ensure that all the sibling threads map to the same node, always lookup/update the cpu-to-node map of the first thread in the core. Signed-off-by: Srikar Dronamraju Reported-by: Abdul Haleem Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200129135301.24739-4-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2cb87c9a0544..518c6dbbccbe 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -467,15 +467,20 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = NUMA_NO_NODE; struct device_node *cpu; + int fcpu = cpu_first_thread_sibling(lcpu); + int nid = NUMA_NO_NODE; /* * If a valid cpu-to-node mapping is already available, use it * directly instead of querying the firmware, since it represents * the most recent mapping notified to us by the platform (eg: VPHN). + * Since cpu_to_node binding remains the same for all threads in the + * core. If a valid cpu-to-node mapping is already available, for + * the first thread in the core, use it. */ - if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { + nid = numa_cpu_lookup_table[fcpu]; + if (nid >= 0) { map_cpu_to_node(lcpu, nid); return nid; } @@ -496,6 +501,19 @@ out_present: if (nid < 0 || !node_possible(nid)) nid = first_online_node; + /* + * Update for the first thread of the core. All threads of a core + * have to be part of the same node. This not only avoids querying + * for every other thread in the core, but always avoids a case + * where virtual node associativity change causes subsequent threads + * of a core to be associated with different nid. However if first + * thread is already online, expect it to have a valid mapping. + */ + if (fcpu != lcpu) { + WARN_ON(cpu_online(fcpu)); + map_cpu_to_node(fcpu, nid); + } + map_cpu_to_node(lcpu, nid); of_node_put(cpu); out: -- cgit v1.2.3-59-g8ed1b From dc909d8b0c9c0d2c42dc1cf34216c4830f639f7b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 29 Jan 2020 19:23:00 +0530 Subject: powerpc/numa: Early request for home node associativity Currently the kernel detects if its running on a shared lpar platform and requests home node associativity before the scheduler sched_domains are setup. However between the time NUMA setup is initialized and the request for home node associativity, workqueue initializes its per node cpumask. The per node workqueue possible cpumask may turn invalid after home node associativity resulting in weird situations like workqueue possible cpumask being a subset of workqueue online cpumask. This can be fixed by requesting home node associativity earlier just before NUMA setup. However at the NUMA setup time, kernel may not be in a position to detect if its running on a shared lpar platform. So request for home node associativity and if the request fails, fallback on the device tree property. Signed-off-by: Srikar Dronamraju Reported-by: Abdul Haleem Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200129135301.24739-5-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 518c6dbbccbe..5a8abf0165d7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -461,6 +461,41 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) return nid; } +#ifdef CONFIG_PPC_SPLPAR +static int vphn_get_nid(long lcpu) +{ + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + long rc, hwid; + + /* + * On a shared lpar, device tree will not have node associativity. + * At this time lppaca, or its __old_status field may not be + * updated. Hence kernel cannot detect if its on a shared lpar. So + * request an explicit associativity irrespective of whether the + * lpar is shared or dedicated. Use the device tree property as a + * fallback. cpu_to_phys_id is only valid between + * smp_setup_cpu_maps() and smp_setup_pacas(). + */ + if (firmware_has_feature(FW_FEATURE_VPHN)) { + if (cpu_to_phys_id) + hwid = cpu_to_phys_id[lcpu]; + else + hwid = get_hard_smp_processor_id(lcpu); + + rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); + if (rc == H_SUCCESS) + return associativity_to_nid(associativity); + } + + return NUMA_NO_NODE; +} +#else +static int vphn_get_nid(long unused) +{ + return NUMA_NO_NODE; +} +#endif /* CONFIG_PPC_SPLPAR */ + /* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. @@ -485,6 +520,10 @@ static int numa_setup_cpu(unsigned long lcpu) return nid; } + nid = vphn_get_nid(lcpu); + if (nid != NUMA_NO_NODE) + goto out_present; + cpu = of_get_cpu_node(lcpu, NULL); if (!cpu) { @@ -496,6 +535,7 @@ static int numa_setup_cpu(unsigned long lcpu) } nid = of_node_to_nid_single(cpu); + of_node_put(cpu); out_present: if (nid < 0 || !node_possible(nid)) @@ -515,7 +555,6 @@ out_present: } map_cpu_to_node(lcpu, nid); - of_node_put(cpu); out: return nid; } -- cgit v1.2.3-59-g8ed1b From 247257b03b04398ca07da4bce3d17bee25d623cb Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 29 Jan 2020 19:23:01 +0530 Subject: powerpc/numa: Remove late request for home node associativity With commit ("powerpc/numa: Early request for home node associativity"), commit 2ea626306810 ("powerpc/topology: Get topology for shared processors at boot") which was requesting home node associativity becomes redundant. Hence remove the late request for home node associativity. Signed-off-by: Srikar Dronamraju Reported-by: Abdul Haleem Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200129135301.24739-6-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 4 ---- arch/powerpc/kernel/smp.c | 5 ----- arch/powerpc/mm/numa.c | 9 --------- 3 files changed, 18 deletions(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index e2e1ccd4a18d..2db7ba789720 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -98,7 +98,6 @@ extern int stop_topology_update(void); extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); extern int timed_topology_update(int nsecs); -extern void __init shared_proc_topology_init(void); #else static inline int start_topology_update(void) { @@ -121,9 +120,6 @@ static inline int timed_topology_update(int nsecs) return 0; } -#ifdef CONFIG_SMP -static inline void shared_proc_topology_init(void) {} -#endif #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #include diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f68cde82bdf3..37c12e3bab9e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1383,11 +1383,6 @@ void __init smp_cpus_done(unsigned int max_cpus) if (smp_ops && smp_ops->bringup_done) smp_ops->bringup_done(); - /* - * On a shared LPAR, associativity needs to be requested. - * Hence, get numa topology before dumping cpu topology - */ - shared_proc_topology_init(); dump_numa_cpu_topology(); #ifdef CONFIG_SCHED_SMT diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5a8abf0165d7..9fcf2d195830 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1632,15 +1632,6 @@ int prrn_is_enabled(void) return prrn_enabled; } -void __init shared_proc_topology_init(void) -{ - if (lppaca_shared_proc(get_lppaca())) { - bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), - nr_cpumask_bits); - numa_update_cpu_topology(false); - } -} - static int topology_read(struct seq_file *file, void *v) { if (vphn_enabled || prrn_enabled) -- cgit v1.2.3-59-g8ed1b From cc6f0e39000900e5dd1448103a9571f0eccd7d4e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 7 Mar 2020 10:09:15 +0000 Subject: powerpc/32: Fix missing NULL pmd check in virt_to_kpte() Commit 2efc7c085f05 ("powerpc/32: drop get_pteptr()"), replaced get_pteptr() by virt_to_kpte(). But virt_to_kpte() lacks a NULL pmd check and returns an invalid non NULL pointer when there is no page table. Reported-by: Nick Desaulniers Fixes: 2efc7c085f05 ("powerpc/32: drop get_pteptr()") Signed-off-by: Christophe Leroy Tested-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b1177cdfc6af74a3e277bba5d9e708c4b3315ebe.1583575707.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/pgtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b80bfd41828d..b1f1d5339735 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -54,7 +54,9 @@ static inline pmd_t *pmd_ptr_k(unsigned long va) static inline pte_t *virt_to_kpte(unsigned long vaddr) { - return pte_offset_kernel(pmd_ptr_k(vaddr), vaddr); + pmd_t *pmd = pmd_ptr_k(vaddr); + + return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } #endif -- cgit v1.2.3-59-g8ed1b From ffd3eaf178b0f616a071e510e289d937330b0b35 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Mon, 24 Feb 2020 16:18:48 -0500 Subject: powerpc/vdso: remove deprecated VDS64_HAS_DESCRIPTORS references The original 2005 patch that introduced the powerpc vdso, pre-git ("ppc64: Implement a vDSO and use it for signal trampoline") notes that: ... symbols exposed by the vDSO aren't "normal" function symbols, apps can't be expected to link against them directly, the vDSO's are both seen as if they were linked at 0 and the symbols just contain offsets to the various functions. This is done on purpose to avoid a relocation step (ppc64 functions normally have descriptors with abs addresses in them). When glibc uses those functions, it's expected to use it's own trampolines that know how to reach them. Despite that explanation, there remains dead #ifdef VDS64_HAS_DESCRIPTORS code-blocks that provide alternate function definitions that setup function descriptors. Since VDS64_HAS_DESCRIPTORS has been unused for all these years, we might as well finally remove it from the codebase. Signed-off-by: Joe Lawrence Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200224211848.26087-1-joe.lawrence@redhat.com --- arch/powerpc/include/asm/vdso.h | 24 ------------------------ arch/powerpc/kernel/vdso.c | 5 ----- 2 files changed, 29 deletions(-) diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index b5e1f8f8a05c..2ff884853f97 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -13,9 +13,6 @@ #define VDSO_VERSION_STRING LINUX_2.6.15 -/* Define if 64 bits VDSO has procedure descriptors */ -#undef VDS64_HAS_DESCRIPTORS - #ifndef __ASSEMBLY__ /* Offsets relative to thread->vdso_base */ @@ -28,25 +25,6 @@ int vdso_getcpu_init(void); #else /* __ASSEMBLY__ */ #ifdef __VDSO64__ -#ifdef VDS64_HAS_DESCRIPTORS -#define V_FUNCTION_BEGIN(name) \ - .globl name; \ - .section ".opd","a"; \ - .align 3; \ - name: \ - .quad .name,.TOC.@tocbase,0; \ - .previous; \ - .globl .name; \ - .type .name,@function; \ - .name: \ - -#define V_FUNCTION_END(name) \ - .size .name,.-.name; - -#define V_LOCAL_FUNC(name) (.name) - -#else /* VDS64_HAS_DESCRIPTORS */ - #define V_FUNCTION_BEGIN(name) \ .globl name; \ name: \ @@ -55,8 +33,6 @@ int vdso_getcpu_init(void); .size name,.-name; #define V_LOCAL_FUNC(name) (name) - -#endif /* VDS64_HAS_DESCRIPTORS */ #endif /* __VDSO64__ */ #ifdef __VDSO32__ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b9a108411c0d..d3b77c15f9ce 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -391,12 +391,7 @@ static unsigned long __init find_function64(struct lib64_elfinfo *lib, symname); return 0; } -#ifdef VDS64_HAS_DESCRIPTORS - return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - - VDSO64_LBASE; -#else return sym->st_value - VDSO64_LBASE; -#endif } static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, -- cgit v1.2.3-59-g8ed1b From d0a72efac89d1c35ac55197895201b7b94c5e6ef Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 6 Feb 2020 17:26:21 +1100 Subject: cpufreq: powernv: Fix use-after-free The cpufreq driver has a use-after-free that we can hit if: a) There's an OCC message pending when the notifier is registered, and b) The cpufreq driver fails to register with the core. When a) occurs the notifier schedules a workqueue item to handle the message. The backing work_struct is located on chips[].throttle and when b) happens we clean up by freeing the array. Once we get to the (now free) queued item and the kernel crashes. Fixes: c5e29ea7ac14 ("cpufreq: powernv: Fix bugs in powernv_cpufreq_{init/exit}") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Oliver O'Halloran Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200206062622.28235-1-oohall@gmail.com --- drivers/cpufreq/powernv-cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 56f4bc0d209e..1806b1da4366 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1080,6 +1080,12 @@ free_and_return: static inline void clean_chip_info(void) { + int i; + + /* flush any pending work items */ + if (chips) + for (i = 0; i < nr_chips; i++) + cancel_work_sync(&chips[i].throttle); kfree(chips); } -- cgit v1.2.3-59-g8ed1b From 966c08de7c2c9bcac13e2cb9e769a39582d5389f Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 6 Feb 2020 17:26:22 +1100 Subject: cpufreq: powernv: Fix unsafe notifiers The PowerNV cpufreq driver registers two notifiers: one to catch throttle messages from the OCC and one to bump the CPU frequency back to normal before a reboot. Both require the cpufreq driver to be registered in order to function since the notifier callbacks use various cpufreq_*() functions. Right now we register both notifiers before we've initialised the driver. This seems to work, but we should head off any protential problems by registering the notifiers after the driver is initialised. Signed-off-by: Oliver O'Halloran Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200206062622.28235-2-oohall@gmail.com --- drivers/cpufreq/powernv-cpufreq.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 1806b1da4366..03798c4326c6 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1114,9 +1114,6 @@ static int __init powernv_cpufreq_init(void) if (rc) goto out; - register_reboot_notifier(&powernv_cpufreq_reboot_nb); - opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); - if (powernv_pstate_info.wof_enabled) powernv_cpufreq_driver.boost_enabled = true; else @@ -1125,15 +1122,17 @@ static int __init powernv_cpufreq_init(void) rc = cpufreq_register_driver(&powernv_cpufreq_driver); if (rc) { pr_info("Failed to register the cpufreq driver (%d)\n", rc); - goto cleanup_notifiers; + goto cleanup; } if (powernv_pstate_info.wof_enabled) cpufreq_enable_boost_support(); + register_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); + return 0; -cleanup_notifiers: - unregister_all_notifiers(); +cleanup: clean_chip_info(); out: pr_info("Platform driver disabled. System does not support PState control\n"); -- cgit v1.2.3-59-g8ed1b From 3670664b5da555a2a481449b3baafff113b0ac35 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 9 Jan 2020 18:39:12 +1100 Subject: tty: evh_bytechan: Fix out of bounds accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ev_byte_channel_send() assumes that its third argument is a 16 byte array. Some places where it is called it may not be (or we can't easily tell if it is). Newer compilers have started producing warnings about this, so make sure we actually pass a 16 byte array. There may be more elegant solutions to this, but the driver is quite old and hasn't been updated in many years. The warnings (from a powerpc allyesconfig build) are: In file included from include/linux/byteorder/big_endian.h:5, from arch/powerpc/include/uapi/asm/byteorder.h:14, from include/asm-generic/bitops/le.h:6, from arch/powerpc/include/asm/bitops.h:250, from include/linux/bitops.h:29, from include/linux/kernel.h:12, from include/asm-generic/bug.h:19, from arch/powerpc/include/asm/bug.h:109, from include/linux/bug.h:5, from include/linux/mmdebug.h:5, from include/linux/gfp.h:5, from include/linux/slab.h:15, from drivers/tty/ehv_bytechan.c:24: drivers/tty/ehv_bytechan.c: In function ‘ehv_bc_udbg_putc’: arch/powerpc/include/asm/epapr_hcalls.h:298:20: warning: array subscript 1 is outside array bounds of ‘const char[1]’ [-Warray-bounds] 298 | r6 = be32_to_cpu(p[1]); include/uapi/linux/byteorder/big_endian.h:40:51: note: in definition of macro ‘__be32_to_cpu’ 40 | #define __be32_to_cpu(x) ((__force __u32)(__be32)(x)) | ^ arch/powerpc/include/asm/epapr_hcalls.h:298:7: note: in expansion of macro ‘be32_to_cpu’ 298 | r6 = be32_to_cpu(p[1]); | ^~~~~~~~~~~ drivers/tty/ehv_bytechan.c:166:13: note: while referencing ‘data’ 166 | static void ehv_bc_udbg_putc(char c) | ^~~~~~~~~~~~~~~~ Fixes: dcd83aaff1c8 ("tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver") Signed-off-by: Stephen Rothwell Tested-by: Laurentiu Tudor [mpe: Trim warnings from change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200109183912.5fcb52aa@canb.auug.org.au --- drivers/tty/ehv_bytechan.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index 769e0a5d1dfc..3c6dd06ec5fb 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -136,6 +136,21 @@ static int find_console_handle(void) return 1; } +static unsigned int local_ev_byte_channel_send(unsigned int handle, + unsigned int *count, + const char *p) +{ + char buffer[EV_BYTE_CHANNEL_MAX_BYTES]; + unsigned int c = *count; + + if (c < sizeof(buffer)) { + memcpy(buffer, p, c); + memset(&buffer[c], 0, sizeof(buffer) - c); + p = buffer; + } + return ev_byte_channel_send(handle, count, p); +} + /*************************** EARLY CONSOLE DRIVER ***************************/ #ifdef CONFIG_PPC_EARLY_DEBUG_EHV_BC @@ -154,7 +169,7 @@ static void byte_channel_spin_send(const char data) do { count = 1; - ret = ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, + ret = local_ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE, &count, &data); } while (ret == EV_EAGAIN); } @@ -221,7 +236,7 @@ static int ehv_bc_console_byte_channel_send(unsigned int handle, const char *s, while (count) { len = min_t(unsigned int, count, EV_BYTE_CHANNEL_MAX_BYTES); do { - ret = ev_byte_channel_send(handle, &len, s); + ret = local_ev_byte_channel_send(handle, &len, s); } while (ret == EV_EAGAIN); count -= len; s += len; @@ -401,7 +416,7 @@ static void ehv_bc_tx_dequeue(struct ehv_bc_data *bc) CIRC_CNT_TO_END(bc->head, bc->tail, BUF_SIZE), EV_BYTE_CHANNEL_MAX_BYTES); - ret = ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); + ret = local_ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail); /* 'len' is valid only if the return code is 0 or EV_EAGAIN */ if (!ret || (ret == EV_EAGAIN)) -- cgit v1.2.3-59-g8ed1b From aa4113340ae6c2811e046f08c2bc21011d20a072 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 23 Jan 2020 11:19:25 +0000 Subject: powerpc/fsl_booke: Avoid creating duplicate tlb1 entry In the current implementation, the call to loadcam_multi() is wrapped between switch_to_as1() and restore_to_as0() calls so, when it tries to create its own temporary AS=1 TLB1 entry, it ends up duplicating the existing one created by switch_to_as1(). Add a check to skip creating the temporary entry if already running in AS=1. Fixes: d9e1831a4202 ("powerpc/85xx: Load all early TLB entries at once") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Laurentiu Tudor Acked-by: Scott Wood Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200123111914.2565-1-laurentiu.tudor@nxp.com --- arch/powerpc/mm/nohash/tlb_low.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 2ca407cedbe7..eaeee402f96e 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -397,7 +397,7 @@ _GLOBAL(set_context) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 + * Must preserve r7, r8, r9, r10 and r11 */ _GLOBAL(loadcam_entry) mflr r5 @@ -433,6 +433,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) */ _GLOBAL(loadcam_multi) mflr r8 + /* Don't switch to AS=1 if already there */ + mfmsr r11 + andi. r11,r11,MSR_IS + bne 10f /* * Set up temporary TLB entry that is the same as what we're @@ -458,6 +462,7 @@ _GLOBAL(loadcam_multi) mtmsr r6 isync +10: mr r9,r3 add r10,r3,r4 2: bl loadcam_entry @@ -466,6 +471,10 @@ _GLOBAL(loadcam_multi) mr r3,r9 blt 2b + /* Don't return to AS=0 if we were in AS=1 at function start */ + andi. r11,r11,MSR_IS + bne 3f + /* Return to AS=0 and clear the temporary entry */ mfmsr r6 rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) @@ -481,6 +490,7 @@ _GLOBAL(loadcam_multi) tlbwe isync +3: mtlr r8 blr #endif -- cgit v1.2.3-59-g8ed1b From 9451c79bc39e610882bdd12370f01af5004a3c4f Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Fri, 20 Sep 2019 18:39:51 +0300 Subject: powerpc/pmac/smp: Avoid unused-variable warnings When building with ppc64_defconfig, the compiler reports that these 2 variables are not used: warning: unused variable 'core99_l2_cache' [-Wunused-variable] warning: unused variable 'core99_l3_cache' [-Wunused-variable] They are only used when CONFIG_PPC64 is not defined. Move them into a section which does the same macro check. Reported-by: Nathan Chancellor Signed-off-by: Ilie Halip [mpe: Move them into core99_init_caches() which is their only user] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190920153951.25762-1-ilie.halip@gmail.com --- arch/powerpc/platforms/powermac/smp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index f95fbdee6efe..4a2a1b2529b3 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -660,13 +660,13 @@ static void smp_core99_gpio_tb_freeze(int freeze) #endif /* !CONFIG_PPC64 */ -/* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ -volatile static long int core99_l2_cache; -volatile static long int core99_l3_cache; - static void core99_init_caches(int cpu) { #ifndef CONFIG_PPC64 + /* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ + volatile static long int core99_l2_cache; + volatile static long int core99_l3_cache; + if (!cpu_has_feature(CPU_FTR_L2CR)) return; -- cgit v1.2.3-59-g8ed1b From a4037d1f1fc4e92b69d7196d4568c33078d465ea Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 3 Mar 2020 16:56:04 +0800 Subject: powerpc/pmac/smp: Drop unnecessary volatile qualifier core99_l2_cache/core99_l3_cache do not need to be marked as volatile, remove it. Signed-off-by: YueHaibing Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200303085604.24952-1-yuehaibing@huawei.com --- arch/powerpc/platforms/powermac/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 4a2a1b2529b3..d2900689d642 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -664,8 +664,8 @@ static void core99_init_caches(int cpu) { #ifndef CONFIG_PPC64 /* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ - volatile static long int core99_l2_cache; - volatile static long int core99_l3_cache; + static long int core99_l2_cache; + static long int core99_l3_cache; if (!cpu_has_feature(CPU_FTR_L2CR)) return; -- cgit v1.2.3-59-g8ed1b From 47bf235f324c696395c30541fe4fcf99fcd24188 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:00:09 +0000 Subject: selftests/powerpc: Add tlbie_test in .gitignore The commit identified below added tlbie_test but forgot to add it in .gitignore. Fixes: 93cad5f78995 ("selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/259f9c06ed4563c4fa4fa8ffa652347278d769e7.1582847784.git.christophe.leroy@c-s.fr --- tools/testing/selftests/powerpc/mm/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 0ebeaea22641..97f7922c52c5 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -6,3 +6,4 @@ segv_errors wild_bctr large_vm_fork_separation bad_accesses +tlbie_test -- cgit v1.2.3-59-g8ed1b From 993cfecc59e57de237ae27fadc84ed24efa87a4d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 2 Mar 2020 11:04:10 +1000 Subject: powerpc/64s/radix: Fix CONFIG_SMP=n build Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200302010410.2957362-1-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 1 + arch/powerpc/mm/book3s64/radix_tlb.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index dd1bea45325c..2a9a0cd79490 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 03f43c924e00..758ade2c2b6e 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -587,6 +587,11 @@ void radix__local_flush_all_mm(struct mm_struct *mm) preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_all_mm); + +static void __flush_all_mm(struct mm_struct *mm, bool fullmm) +{ + radix__local_flush_all_mm(mm); +} #endif /* CONFIG_SMP */ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, @@ -777,7 +782,7 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) EXPORT_SYMBOL(radix__flush_tlb_page); #else /* CONFIG_SMP */ -#define radix__flush_all_mm radix__local_flush_all_mm +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ static void do_tlbiel_kernel(void *info) -- cgit v1.2.3-59-g8ed1b From 59ed2adf393109c56d383e568f2e57bb5ad6d901 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 15:53:02 +1000 Subject: powerpc/lib: Fix emulate_step() std test We should be checking that the instruction was stepped *and* that the target register has the right value. Signed-off-by: Nicholas Piggin Reviewed-by: Ravi Bangoria [mpe: Write change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200226055302.1577954-1-npiggin@gmail.com --- arch/powerpc/lib/test_emulate_step.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 42347067739c..00d70253cb5b 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -160,7 +160,7 @@ static void __init test_std(void) /* std r5, 0(r3) */ stepped = emulate_step(®s, TEST_STD(5, 3, 0)); - if (stepped == 1 || regs.gpr[5] == a) + if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); -- cgit v1.2.3-59-g8ed1b From a0968a025c04702427a4aee2c618f451a5098cd8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 4 Mar 2020 22:04:02 +1100 Subject: selftests/powerpc: Add a test of sigreturn vs VDSO There's two different paths through the sigreturn code, depending on whether the VDSO is mapped or not. We recently discovered a bug in the unmapped case, because it's not commonly used these days. So add a test that sends itself a signal, then moves the VDSO, takes another signal and finally unmaps the VDSO before sending itself another signal. That tests the standard signal path, the code that handles the VDSO being moved, and also the signal path in the case where the VDSO is unmapped. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200304110402.6038-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/signal/.gitignore | 1 + tools/testing/selftests/powerpc/signal/Makefile | 2 +- .../selftests/powerpc/signal/sigreturn_vdso.c | 127 +++++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/signal/sigreturn_vdso.c diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index dca5852a1546..03dafa795255 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -1,3 +1,4 @@ signal signal_tm sigfuz +sigreturn_vdso diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 113838fbbe7f..63b57583e07d 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := signal signal_tm sigfuz +TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c new file mode 100644 index 000000000000..e282fff0fe25 --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that we can take signals with and without the VDSO mapped, which trigger + * different paths in the signal handling code. + * + * See handle_rt_signal64() and setup_trampoline() in signal_64.c + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +// Ensure assert() is not compiled out +#undef NDEBUG +#include + +#include "utils.h" + +static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high) +{ + unsigned long start, end; + static char buf[4096]; + char name[128]; + FILE *f; + int rc = -1; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, name); + if (rc == 2) + continue; + + if (rc != 3) { + printf("sscanf errored\n"); + rc = -1; + break; + } + + if (strstr(name, needle)) { + *low = start; + *high = end - 1; + rc = 0; + break; + } + } + + fclose(f); + + return rc; +} + +static volatile sig_atomic_t took_signal = 0; + +static void sigusr1_handler(int sig) +{ + took_signal++; +} + +int test_sigreturn_vdso(void) +{ + unsigned long low, high, size; + struct sigaction act; + char *p; + + act.sa_handler = sigusr1_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + assert(sigaction(SIGUSR1, &act, NULL) == 0); + + // Confirm the VDSO is mapped, and work out where it is + assert(search_proc_maps("[vdso]", &low, &high) == 0); + size = high - low + 1; + printf("VDSO is at 0x%lx-0x%lx (%lu bytes)\n", low, high, size); + + kill(getpid(), SIGUSR1); + assert(took_signal == 1); + printf("Signal delivered OK with VDSO mapped\n"); + + // Remap the VDSO somewhere else + p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(p != MAP_FAILED); + assert(mremap((void *)low, size, size, MREMAP_MAYMOVE|MREMAP_FIXED, p) != MAP_FAILED); + assert(search_proc_maps("[vdso]", &low, &high) == 0); + size = high - low + 1; + printf("VDSO moved to 0x%lx-0x%lx (%lu bytes)\n", low, high, size); + + kill(getpid(), SIGUSR1); + assert(took_signal == 2); + printf("Signal delivered OK with VDSO moved\n"); + + assert(munmap((void *)low, size) == 0); + printf("Unmapped VDSO\n"); + + // Confirm the VDSO is not mapped anymore + assert(search_proc_maps("[vdso]", &low, &high) != 0); + + // Make the stack executable + assert(search_proc_maps("[stack]", &low, &high) == 0); + size = high - low + 1; + mprotect((void *)low, size, PROT_READ|PROT_WRITE|PROT_EXEC); + printf("Remapped the stack executable\n"); + + kill(getpid(), SIGUSR1); + assert(took_signal == 3); + printf("Signal delivered OK with VDSO unmapped\n"); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_vdso, "sigreturn_vdso"); +} -- cgit v1.2.3-59-g8ed1b From 61da50b76b62fd815aa82d853bf82bf4f69568f5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 1 Mar 2020 22:17:38 +1100 Subject: powerpc/kuap: PPC_KUAP_DEBUG should depend on PPC_KUAP Currently you can enable PPC_KUAP_DEBUG when PPC_KUAP is disabled, even though the former has not effect without the latter. Fix it so that PPC_KUAP_DEBUG can only be enabled when PPC_KUAP is enabled, not when the platform could support KUAP (PPC_HAVE_KUAP). Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200301111738.22497-1-mpe@ellerman.id.au --- arch/powerpc/platforms/Kconfig.cputype | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 4208724e9f28..0c3c1902135c 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -397,7 +397,7 @@ config PPC_KUAP config PPC_KUAP_DEBUG bool "Extra debugging for Kernel Userspace Access Protection" - depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32) + depends on PPC_KUAP && (PPC_RADIX_MMU || PPC_32) help Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. -- cgit v1.2.3-59-g8ed1b From 915b7f6f9a5e232c138bb36743a1fdb0fcf2c432 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 11 Feb 2020 00:38:30 -0300 Subject: selftests/powerpc: Add tm-signal-pagefault test This test triggers a TM Bad Thing by raising a signal in transactional state and forcing a pagefault to happen in kernelspace when the kernel signal handling code first touches the user signal stack. This is inspired by the test tm-signal-context-force-tm but uses userfaultfd to make the test deterministic. While this test always triggers the bug in one run, I had to execute tm-signal-context-force-tm several times (the test runs 5000 times each execution) to trigger the same bug. tm-signal-context-force-tm is kept instead of replaced because, while this test is more reliable and triggers the same bug, tm-signal-context-force-tm has a better coverage, in the sense that by running the test several times it might trigger the pagefault and/or be preempted at different places. v3: skip test if userfaultfd is unavailable. Signed-off-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200211033831.11165-2-gustavold@linux.ibm.com --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 3 +- .../selftests/powerpc/tm/tm-signal-pagefault.c | 284 +++++++++++++++++++++ 3 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 98f2708d86cc..e1c72a4a3e91 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -13,6 +13,7 @@ tm-signal-context-chk-vmx tm-signal-context-chk-vsx tm-signal-context-force-tm tm-signal-sigreturn-nt +tm-signal-pagefault tm-vmx-unavail tm-unavailable tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index b15a1a325bd0..b1d99736f8b8 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ - tm-signal-context-force-tm tm-poison + tm-signal-context-force-tm tm-poison tm-signal-pagefault top_srcdir = ../../../../.. include ../../lib.mk @@ -22,6 +22,7 @@ $(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 $(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64 +$(OUTPUT)/tm-signal-pagefault: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c new file mode 100644 index 000000000000..5908bc6abe60 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020, Gustavo Luiz Duarte, IBM Corp. + * + * This test starts a transaction and triggers a signal, forcing a pagefault to + * happen when the kernel signal handling code touches the user signal stack. + * + * In order to avoid pre-faulting the signal stack memory and to force the + * pagefault to happen precisely in the kernel signal handling code, the + * pagefault handling is done in userspace using the userfaultfd facility. + * + * Further pagefaults are triggered by crafting the signal handler's ucontext + * to point to additional memory regions managed by the userfaultfd, so using + * the same mechanism used to avoid pre-faulting the signal stack memory. + * + * On failure (bug is present) kernel crashes or never returns control back to + * userspace. If bug is not present, tests completes almost immediately. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tm.h" + + +#define UF_MEM_SIZE 655360 /* 10 x 64k pages */ + +/* Memory handled by userfaultfd */ +static char *uf_mem; +static size_t uf_mem_offset = 0; + +/* + * Data that will be copied into the faulting pages (instead of zero-filled + * pages). This is used to make the test more reliable and avoid segfaulting + * when we return from the signal handler. Since we are making the signal + * handler's ucontext point to newly allocated memory, when that memory is + * paged-in it will contain the expected content. + */ +static char backing_mem[UF_MEM_SIZE]; + +static size_t pagesize; + +/* + * Return a chunk of at least 'size' bytes of memory that will be handled by + * userfaultfd. If 'backing_data' is not NULL, its content will be save to + * 'backing_mem' and then copied into the faulting pages when the page fault + * is handled. + */ +void *get_uf_mem(size_t size, void *backing_data) +{ + void *ret; + + if (uf_mem_offset + size > UF_MEM_SIZE) { + fprintf(stderr, "Requesting more uf_mem than expected!\n"); + exit(EXIT_FAILURE); + } + + ret = &uf_mem[uf_mem_offset]; + + /* Save the data that will be copied into the faulting page */ + if (backing_data != NULL) + memcpy(&backing_mem[uf_mem_offset], backing_data, size); + + /* Reserve the requested amount of uf_mem */ + uf_mem_offset += size; + /* Keep uf_mem_offset aligned to the page size (round up) */ + uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1); + + return ret; +} + +void *fault_handler_thread(void *arg) +{ + struct uffd_msg msg; /* Data read from userfaultfd */ + long uffd; /* userfaultfd file descriptor */ + struct uffdio_copy uffdio_copy; + struct pollfd pollfd; + ssize_t nread, offset; + + uffd = (long) arg; + + for (;;) { + pollfd.fd = uffd; + pollfd.events = POLLIN; + if (poll(&pollfd, 1, -1) == -1) { + perror("poll() failed"); + exit(EXIT_FAILURE); + } + + nread = read(uffd, &msg, sizeof(msg)); + if (nread == 0) { + fprintf(stderr, "read(): EOF on userfaultfd\n"); + exit(EXIT_FAILURE); + } + + if (nread == -1) { + perror("read() failed"); + exit(EXIT_FAILURE); + } + + /* We expect only one kind of event */ + if (msg.event != UFFD_EVENT_PAGEFAULT) { + fprintf(stderr, "Unexpected event on userfaultfd\n"); + exit(EXIT_FAILURE); + } + + /* + * We need to handle page faults in units of pages(!). + * So, round faulting address down to page boundary. + */ + uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1); + + offset = (char *) uffdio_copy.dst - uf_mem; + uffdio_copy.src = (unsigned long) &backing_mem[offset]; + + uffdio_copy.len = pagesize; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { + perror("ioctl-UFFDIO_COPY failed"); + exit(EXIT_FAILURE); + } + } +} + +void setup_uf_mem(void) +{ + long uffd; /* userfaultfd file descriptor */ + pthread_t thr; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + int ret; + + pagesize = sysconf(_SC_PAGE_SIZE); + + /* Create and enable userfaultfd object */ + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd == -1) { + perror("userfaultfd() failed"); + exit(EXIT_FAILURE); + } + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { + perror("ioctl-UFFDIO_API failed"); + exit(EXIT_FAILURE); + } + + /* + * Create a private anonymous mapping. The memory will be demand-zero + * paged, that is, not yet allocated. When we actually touch the memory + * the related page will be allocated via the userfaultfd mechanism. + */ + uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (uf_mem == MAP_FAILED) { + perror("mmap() failed"); + exit(EXIT_FAILURE); + } + + /* + * Register the memory range of the mapping we've just mapped to be + * handled by the userfaultfd object. In 'mode' we request to track + * missing pages (i.e. pages that have not yet been faulted-in). + */ + uffdio_register.range.start = (unsigned long) uf_mem; + uffdio_register.range.len = UF_MEM_SIZE; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { + perror("ioctl-UFFDIO_REGISTER"); + exit(EXIT_FAILURE); + } + + /* Create a thread that will process the userfaultfd events */ + ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); + if (ret != 0) { + fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret); + exit(EXIT_FAILURE); + } +} + +/* + * Assumption: the signal was delivered while userspace was in transactional or + * suspended state, i.e. uc->uc_link != NULL. + */ +void signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + + /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */ + ucp->uc_link->uc_mcontext.regs->nip += 4; + + ucp->uc_mcontext.v_regs = + get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs); + + ucp->uc_link->uc_mcontext.v_regs = + get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs); + + ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link); +} + +bool have_userfaultfd(void) +{ + long rc; + + errno = 0; + rc = syscall(__NR_userfaultfd, -1); + + return rc == 0 || errno != ENOSYS; +} + +int tm_signal_pagefault(void) +{ + struct sigaction sa; + stack_t ss; + + SKIP_IF(!have_htm()); + SKIP_IF(!have_userfaultfd()); + + setup_uf_mem(); + + /* + * Set an alternative stack that will generate a page fault when the + * signal is raised. The page fault will be treated via userfaultfd, + * i.e. via fault_handler_thread. + */ + ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; + if (sigaltstack(&ss, NULL) == -1) { + perror("sigaltstack() failed"); + exit(EXIT_FAILURE); + } + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sa.sa_sigaction = signal_handler; + if (sigaction(SIGTRAP, &sa, NULL) == -1) { + perror("sigaction() failed"); + exit(EXIT_FAILURE); + } + + /* Trigger a SIGTRAP in transactional state */ + asm __volatile__( + "tbegin.;" + "beq 1f;" + "trap;" + "1: ;" + : : : "memory"); + + /* Trigger a SIGTRAP in suspended state */ + asm __volatile__( + "tbegin.;" + "beq 1f;" + "tsuspend.;" + "trap;" + "tresume.;" + "1: ;" + : : : "memory"); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + /* + * Depending on kernel config, the TM Bad Thing might not result in a + * crash, instead the kernel never returns control back to userspace, so + * set a tight timeout. If the test passes it completes almost + * immediately. + */ + test_harness_set_timeout(2); + return test_harness(tm_signal_pagefault, "tm_signal_pagefault"); +} -- cgit v1.2.3-59-g8ed1b From 0f8f554e5244f56f496b4ce30ada1126fe290345 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 11 Feb 2020 00:38:31 -0300 Subject: selftests/powerpc: Don't rely on segfault to rerun the test The test case tm-signal-context-force-tm expects a segfault to happen on returning from signal handler, and then does a setcontext() to run the test again. However, the test doesn't always segfault, causing the test to run a single time. This patch fixes the test by putting it within a loop and jumping, via setcontext, just prior to the loop in case it segfaults. This way we get the desired behavior (run the test COUNT_MAX times) regardless if it segfaults or not. This also reduces the use of setcontext for control flow logic, keeping it only in the segfault handler. Also, since 'count' is changed within the signal handler, it is declared as volatile to prevent any compiler optimization getting confused with asynchronous changes. Signed-off-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200211033831.11165-3-gustavold@linux.ibm.com --- .../powerpc/tm/tm-signal-context-force-tm.c | 74 ++++++++++------------ 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 31717625f318..421cb082f6be 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -42,9 +42,10 @@ #endif /* Setting contexts because the test will crash and we want to recover */ -ucontext_t init_context, main_context; +ucontext_t init_context; -static int count, first_time; +/* count is changed in the signal handler, so it must be volatile */ +static volatile int count; void usr_signal_handler(int signo, siginfo_t *si, void *uc) { @@ -98,11 +99,6 @@ void usr_signal_handler(int signo, siginfo_t *si, void *uc) void seg_signal_handler(int signo, siginfo_t *si, void *uc) { - if (count == COUNT_MAX) { - /* Return to tm_signal_force_msr() and exit */ - setcontext(&main_context); - } - count++; /* Reexecute the test */ @@ -126,37 +122,41 @@ void tm_trap_test(void) */ getcontext(&init_context); - /* Allocated an alternative signal stack area */ - ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - ss.ss_size = SIGSTKSZ; - ss.ss_flags = 0; + while (count < COUNT_MAX) { + /* Allocated an alternative signal stack area */ + ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + ss.ss_size = SIGSTKSZ; + ss.ss_flags = 0; - if (ss.ss_sp == (void *)-1) { - perror("mmap error\n"); - exit(-1); - } + if (ss.ss_sp == (void *)-1) { + perror("mmap error\n"); + exit(-1); + } - /* Force the allocation through a page fault */ - if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { - perror("madvise\n"); - exit(-1); - } + /* Force the allocation through a page fault */ + if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) { + perror("madvise\n"); + exit(-1); + } - /* Setting an alternative stack to generate a page fault when - * the signal is raised. - */ - if (sigaltstack(&ss, NULL)) { - perror("sigaltstack\n"); - exit(-1); + /* + * Setting an alternative stack to generate a page fault when + * the signal is raised. + */ + if (sigaltstack(&ss, NULL)) { + perror("sigaltstack\n"); + exit(-1); + } + + /* The signal handler will enable MSR_TS */ + sigaction(SIGUSR1, &usr_sa, NULL); + /* If it does not crash, it might segfault, avoid it to retest */ + sigaction(SIGSEGV, &seg_sa, NULL); + + raise(SIGUSR1); + count++; } - - /* The signal handler will enable MSR_TS */ - sigaction(SIGUSR1, &usr_sa, NULL); - /* If it does not crash, it will segfault, avoid it to retest */ - sigaction(SIGSEGV, &seg_sa, NULL); - - raise(SIGUSR1); } int tm_signal_context_force_tm(void) @@ -169,11 +169,7 @@ int tm_signal_context_force_tm(void) */ SKIP_IF(!is_ppc64le()); - /* Will get back here after COUNT_MAX interactions */ - getcontext(&main_context); - - if (!first_time++) - tm_trap_test(); + tm_trap_test(); return EXIT_SUCCESS; } -- cgit v1.2.3-59-g8ed1b From c5e76fa05b2df519b9f08571cc57e623c1569faa Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Fri, 6 Mar 2020 20:45:47 -0600 Subject: powerpc/pseries: Fix of_read_drc_info_cell() to point at next record The expectation is that when calling of_read_drc_info_cell() repeatedly to parse multiple drc-info records that the in/out curval parameter points at the start of the next record on return. However, the current behavior has curval still pointing at the final value of the record just parsed. The result of which is that if the ibm,drc-info property contains multiple properties the parsed value of the drc_type for any record after the first has the power_domain value of the previous record appended to the type string. eg: observed the following 0xffffffff prepended to PHB drc-info: type: \xff\xff\xff\xffPHB, prefix: PHB , index_start: 0x20000001 drc-info: suffix_start: 1, sequential_elems: 3072, sequential_inc: 1 drc-info: power-domain: 0xffffffff, last_index: 0x20000c00 In practice PHBs are the only type of connector in the ibm,drc-info property that has multiple records. So, it breaks PHB hotplug, but by chance not PCI, CPU, slot, or memory because they happen to only ever be a single record. Fix by incrementing curval past the power_domain value to point at drc_type string of next record. Fixes: e83636ac3334 ("pseries/drc-info: Search DRC properties for CPU indexes") Signed-off-by: Tyrel Datwyler Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200307024547.5748-1-tyreld@linux.ibm.com --- arch/powerpc/platforms/pseries/of_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 66dfd8256712..23241c71ef37 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -88,7 +88,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval, return -EINVAL; /* Should now know end of current entry */ - (*curval) = (void *)p2; + (*curval) = (void *)(++p2); data->last_drc_index = data->drc_index_start + ((data->num_sequential_elems - 1) * data->sequential_inc); -- cgit v1.2.3-59-g8ed1b From 3e74a0e16342626511c43937c120beb990539307 Mon Sep 17 00:00:00 2001 From: Balamuruhan S Date: Wed, 11 Mar 2020 15:54:05 +0530 Subject: powerpc/sstep: Fix DS operand in ld encoding to appropriate value ld instruction should have 14 bit immediate field (DS) concatenated with 0b00 on the right, encode it accordingly. Introduce macro `IMM_DS()` to encode DS form instructions with 14 bit immediate field. Fixes: 4ceae137bdab ("powerpc: emulate_step() tests for load/store instructions") Reviewed-by: Sandipan Das Signed-off-by: Balamuruhan S Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200311102405.392263-1-bala24@linux.ibm.com --- arch/powerpc/lib/test_emulate_step.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 00d70253cb5b..53df4146dd32 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -13,19 +13,20 @@ #include #define IMM_L(i) ((uintptr_t)(i) & 0xffff) +#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) /* * Defined with TEST_ prefix so it does not conflict with other * definitions. */ #define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) + ___PPC_RA(base) | IMM_DS(i)) #define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b)) #define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) + ___PPC_RA(base) | IMM_DS(i)) #define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b) | \ __PPC_EH(eh)) -- cgit v1.2.3-59-g8ed1b From addf3727ad28bd159ae2da433b48daf2ffb339f7 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 21:51:31 -0700 Subject: powerpc/cell: Use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Signed-off-by: Joe Perches Acked-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/03073a9a269010ca439e9e658629c44602b0cc9f.1583896348.git.joe@perches.com --- arch/powerpc/platforms/cell/spufs/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 5c3f5d088c3b..d56b4e3241cd 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -177,7 +177,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == MFC_CNTL_SUSPEND_COMPLETE); - /* fall through */ + fallthrough; case MFC_CNTL_SUSPEND_COMPLETE: if (csa) csa->priv2.mfc_control_RW = -- cgit v1.2.3-59-g8ed1b From b4f00d5b2098320a0d4c4a6d31099bc0c9a85b02 Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Thu, 12 Mar 2020 12:12:55 +0530 Subject: powerpc: Replace setup_irq() by request_irq() request_irq() is preferred over setup_irq(). Invocations of setup_irq() occur after memory allocators are ready. Per tglx[1], setup_irq() existed in olden days when allocators were not ready by the time early interrupts were initialized. Hence replace setup_irq() by request_irq(). [1] https://lkml.kernel.org/r/alpine.DEB.2.20.1710191609480.1971@nanos Signed-off-by: afzal mohammed Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200312064256.18735-1-afzal.mohd.ma@gmail.com --- arch/powerpc/platforms/85xx/mpc85xx_cds.c | 11 ++++------- arch/powerpc/platforms/8xx/cpm1.c | 9 ++------- arch/powerpc/platforms/8xx/m8xx_setup.c | 9 ++------- arch/powerpc/platforms/chrp/setup.c | 14 +++++--------- arch/powerpc/platforms/powermac/pic.c | 29 +++++++++++------------------ arch/powerpc/platforms/powermac/smp.c | 12 +++++------- 6 files changed, 29 insertions(+), 55 deletions(-) diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 6b1436abe9b1..915ab6710b93 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -218,12 +218,6 @@ static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id) { return IRQ_HANDLED; } - -static struct irqaction mpc85xxcds_8259_irqaction = { - .handler = mpc85xx_8259_cascade_action, - .flags = IRQF_SHARED | IRQF_NO_THREAD, - .name = "8259 cascade", -}; #endif /* PPC_I8259 */ #endif /* CONFIG_PCI */ @@ -271,7 +265,10 @@ static int mpc85xx_cds_8259_attach(void) * disabled when the last user of the shared IRQ line frees their * interrupt. */ - if ((ret = setup_irq(cascade_irq, &mpc85xxcds_8259_irqaction))) { + ret = request_irq(cascade_irq, mpc85xx_8259_cascade_action, + IRQF_SHARED | IRQF_NO_THREAD, "8259 cascade", + cascade_node); + if (ret) { printk(KERN_ERR "Failed to setup cascade interrupt\n"); return ret; } diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index a43ee7d1ff85..4db4ca2e1222 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -120,12 +120,6 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev) return IRQ_HANDLED; } -static struct irqaction cpm_error_irqaction = { - .handler = cpm_error_interrupt, - .flags = IRQF_NO_THREAD, - .name = "error", -}; - static const struct irq_domain_ops cpm_pic_host_ops = { .map = cpm_pic_host_map, }; @@ -187,7 +181,8 @@ unsigned int __init cpm_pic_init(void) if (!eirq) goto end; - if (setup_irq(eirq, &cpm_error_irqaction)) + if (request_irq(eirq, cpm_error_interrupt, IRQF_NO_THREAD, "error", + NULL)) printk(KERN_ERR "Could not allocate CPM error IRQ!"); setbits32(&cpic_reg->cpic_cicr, CICR_IEN); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index f1c805c8adbc..df4d57d07f9a 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -39,12 +39,6 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) return IRQ_HANDLED; } -static struct irqaction tbint_irqaction = { - .handler = timebase_interrupt, - .flags = IRQF_NO_THREAD, - .name = "tbint", -}; - /* per-board overridable init_internal_rtc() function. */ void __init __attribute__ ((weak)) init_internal_rtc(void) @@ -157,7 +151,8 @@ void __init mpc8xx_calibrate_decr(void) (TBSCR_TBF | TBSCR_TBE)); immr_unmap(sys_tmr2); - if (setup_irq(virq, &tbint_irqaction)) + if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint", + NULL)) panic("Could not allocate timer IRQ!"); } diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index fcf6f2342ef4..8328cd5817b0 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -451,13 +451,6 @@ static void __init chrp_find_openpic(void) of_node_put(np); } -#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON) -static struct irqaction xmon_irqaction = { - .handler = xmon_irq, - .name = "XMON break", -}; -#endif - static void __init chrp_find_8259(void) { struct device_node *np, *pic = NULL; @@ -541,8 +534,11 @@ static void __init chrp_init_IRQ(void) if (of_node_is_type(kbd->parent, "adb")) break; of_node_put(kbd); - if (kbd) - setup_irq(HYDRA_INT_ADB_NMI, &xmon_irqaction); + if (kbd) { + if (request_irq(HYDRA_INT_ADB_NMI, xmon_irq, 0, "XMON break", + NULL)) + pr_err("Failed to register XMON break interrupt\n"); + } #endif } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 2e969073473d..4921bccf0376 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -250,20 +250,6 @@ static unsigned int pmac_pic_get_irq(void) return irq_linear_revmap(pmac_pic_host, irq); } -#ifdef CONFIG_XMON -static struct irqaction xmon_action = { - .handler = xmon_irq, - .flags = IRQF_NO_THREAD, - .name = "NMI - XMON" -}; -#endif - -static struct irqaction gatwick_cascade_action = { - .handler = gatwick_action, - .flags = IRQF_NO_THREAD, - .name = "cascade", -}; - static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { @@ -384,12 +370,17 @@ static void __init pmac_pic_probe_oldstyle(void) out_le32(&pmac_irq_hw[i]->enable, 0); /* Hookup cascade irq */ - if (slave && pmac_irq_cascade) - setup_irq(pmac_irq_cascade, &gatwick_cascade_action); + if (slave && pmac_irq_cascade) { + if (request_irq(pmac_irq_cascade, gatwick_action, + IRQF_NO_THREAD, "cascade", NULL)) + pr_err("Failed to register cascade interrupt\n"); + } printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs); #ifdef CONFIG_XMON - setup_irq(irq_create_mapping(NULL, 20), &xmon_action); + i = irq_create_mapping(NULL, 20); + if (request_irq(i, xmon_irq, IRQF_NO_THREAD, "NMI - XMON", NULL)) + pr_err("Failed to register NMI-XMON interrupt\n"); #endif } @@ -441,7 +432,9 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) nmi_irq = irq_of_parse_and_map(pswitch, 0); if (nmi_irq) { mpic_irq_set_priority(nmi_irq, 9); - setup_irq(nmi_irq, &xmon_action); + if (request_irq(nmi_irq, xmon_irq, IRQF_NO_THREAD, + "NMI - XMON", NULL)) + pr_err("Failed to register NMI-XMON interrupt\n"); } of_node_put(pswitch); } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index d2900689d642..be2ab5b11e57 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -399,21 +399,19 @@ static int __init smp_psurge_kick_cpu(int nr) return 0; } -static struct irqaction psurge_irqaction = { - .handler = psurge_ipi_intr, - .flags = IRQF_PERCPU | IRQF_NO_THREAD, - .name = "primary IPI", -}; - static void __init smp_psurge_setup_cpu(int cpu_nr) { + unsigned long flags = IRQF_PERCPU | IRQF_NO_THREAD; + int irq; + if (cpu_nr != 0 || !psurge_start) return; /* reset the entry point so if we get another intr we won't * try to startup again */ out_be32(psurge_start, 0x100); - if (setup_irq(irq_create_mapping(NULL, 30), &psurge_irqaction)) + irq = irq_create_mapping(NULL, 30); + if (request_irq(irq, psurge_ipi_intr, flags, "primary IPI", NULL)) printk(KERN_ERR "Couldn't get primary IPI interrupt"); } -- cgit v1.2.3-59-g8ed1b From 74bb84e5117146fa73eb9d01305975c53022b3c3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 12 Mar 2020 18:44:04 +1100 Subject: powerpc/prom_init: Pass the "os-term" message to hypervisor The "os-term" RTAS calls has one argument with a message address of OS termination cause. rtas_os_term() already passes it but the recently added prom_init's version of that missed it; it also does not fill args correctly. This passes the message address and initializes the number of arguments. Fixes: 6a9c930bd775 ("powerpc/prom_init: Add the ESM call to prom_init") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200312074404.87293-1-aik@ozlabs.ru --- arch/powerpc/kernel/prom_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 577345382b23..673f13b87db1 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1773,6 +1773,9 @@ static void __init prom_rtas_os_term(char *str) if (token == 0) prom_panic("Could not get token for ibm,os-term\n"); os_term_args.token = cpu_to_be32(token); + os_term_args.nargs = cpu_to_be32(1); + os_term_args.nret = cpu_to_be32(1); + os_term_args.args[0] = cpu_to_be32(__pa(str)); prom_rtas_hcall((uint64_t)&os_term_args); } #endif /* CONFIG_PPC_SVM */ -- cgit v1.2.3-59-g8ed1b From d64c7dbb4d98306b794401ca924ad053f84b59f8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 19 Feb 2020 22:00:07 +1100 Subject: powerpc/xmon: Lower limits on nidump and ndump In xmon we have two variables that are used by the dump commands. There's ndump which is the number of bytes to dump using 'd', and nidump which is the number of instructions to dump using 'di'. ndump starts as 64 and nidump starts as 16, but both can be set by the user. It's fairly common to be pasting addresses into xmon when trying to debug something, and if you inadvertently double paste an address like so: 0:mon> di c000000002101f6c c000000002101f6c The second value is interpreted as the number of instructions to dump. Luckily it doesn't dump 13 quintrillion instructions, the value is limited to MAX_DUMP (128K). But as each instruction is dumped on a single line, that's still a lot of output. If you're on a slow console that can take multiple minutes to print. If you were "just popping in and out of xmon quickly before the RCU/hardlockup detector fires" you are now having a bad day. Things are not as bad with 'd' because we print 16 bytes per line, so it's fewer lines. But it's still quite a lot. So shrink the maximum for 'd' to 64K (one page), which is 4096 lines. For 'di' add a new limit which is the above / 4 - because instructions are 4 bytes, meaning again we can dump one page. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200219110007.31195-1-mpe@ellerman.id.au --- arch/powerpc/xmon/xmon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0ec9640335bb..ea303b7e4e29 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -81,8 +81,9 @@ static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE); static unsigned long adrs; static int size = 1; -#define MAX_DUMP (128 * 1024) +#define MAX_DUMP (64 * 1024) static unsigned long ndump = 64; +#define MAX_IDUMP (MAX_DUMP >> 2) static unsigned long nidump = 16; static unsigned long ncsum = 4096; static int termch; @@ -2756,8 +2757,8 @@ dump(void) scanhex(&nidump); if (nidump == 0) nidump = 16; - else if (nidump > MAX_DUMP) - nidump = MAX_DUMP; + else if (nidump > MAX_IDUMP) + nidump = MAX_IDUMP; adrs += ppc_inst_dump(adrs, nidump, 1); last_cmd = "di\n"; } else if (c == 'l') { -- cgit v1.2.3-59-g8ed1b From 6eeb9b3b9ce588f14a697737a30d0702b5a20293 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 14:13:28 +1100 Subject: powerpc/64s: Fix section mismatch warnings from boot code We currently have two section mismatch warnings: The function __boot_from_prom() references the function __init prom_init(). The function start_here_common() references the function __init start_kernel(). The warnings are correct, we do have branches from non-init code into init code, which is freed after boot. But we don't expect to ever execute any of that early boot code after boot, if we did that would be a bug. In particular calling into OF after boot would be fatal because OF is no longer resident. So for now fix the warnings by marking the relevant functions as __REF, which puts them in the ".ref.text" section. This causes some reordering of the functions in the final link: @@ -217,10 +217,9 @@ c00000000000b088 t generic_secondary_common_init c00000000000b124 t __mmu_off c00000000000b14c t __start_initialization_multiplatform -c00000000000b1ac t __boot_from_prom -c00000000000b1ec t __after_prom_start -c00000000000b260 t p_end -c00000000000b27c T copy_and_flush +c00000000000b1ac t __after_prom_start +c00000000000b220 t p_end +c00000000000b23c T copy_and_flush c00000000000b300 T __secondary_start c00000000000b300 t copy_to_here c00000000000b344 t start_secondary_prolog @@ -228,8 +227,9 @@ c00000000000b36c t enable_64b_mode c00000000000b388 T relative_toc c00000000000b3a8 t p_toc -c00000000000b3b0 t start_here_common -c00000000000b3d0 t start_here_multiplatform +c00000000000b3b0 t __boot_from_prom +c00000000000b3f0 t start_here_multiplatform +c00000000000b480 t start_here_common c00000000000b880 T system_call_common c00000000000b974 t system_call c00000000000b9dc t system_call_exit In particular __boot_from_prom moves after copy_to_here, which means it's not copied to zero in the first stage of copy of the kernel to zero. But that's OK, because we only call __boot_from_prom before we do the copy, so it makes no difference when it's copied. The call sequence is: __start -> __start_initialization_multiplatform -> __boot_from_prom -> __start -> __start_initialization_multiplatform -> __after_prom_start -> copy_and_flush -> copy_and_flush (relocated to 0) -> start_here_multiplatform -> early_setup Reported-by: Mauricio Faria de Oliveira Reported-by: Roman Bolshakov Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225031328.14676-1-mpe@ellerman.id.au --- arch/powerpc/kernel/head_64.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ad79fddb974d..ddfbd02140d9 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -537,6 +537,7 @@ __start_initialization_multiplatform: b __after_prom_start #endif /* CONFIG_PPC_BOOK3E */ +__REF __boot_from_prom: #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE /* Save parameters */ @@ -574,6 +575,7 @@ __boot_from_prom: /* We never return. We also hit that trap if trying to boot * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */ trap + .previous __after_prom_start: #ifdef CONFIG_RELOCATABLE @@ -977,7 +979,6 @@ start_here_multiplatform: RFI b . /* prevent speculative execution */ - .previous /* This is where all platforms converge execution */ start_here_common: @@ -1001,6 +1002,7 @@ start_here_common: /* Not reached */ trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 + .previous /* * We put a few things here that have to be page-aligned. -- cgit v1.2.3-59-g8ed1b From eb4f8e259acc37b91b62ca57e0d3c8960c357843 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 19 Feb 2020 08:05:57 +0000 Subject: powerpc/kprobes: Remove redundant code At the time being we have something like if (something) { p = get(); if (p) { if (something_wrong) goto out; ... return; } else if (a != b) { if (some_error) goto out; ... } goto out; } p = get(); if (!p) { if (a != b) { if (some_error) goto out; ... } goto out; } This is similar to p = get(); if (!p) { if (a != b) { if (some_error) goto out; ... } goto out; } if (something) { if (something_wrong) goto out; ... return; } Signed-off-by: Christophe Leroy [mpe: Reflow the comment that was moved] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/07a17425743600460ce35fa9432d42487a825583.1582099499.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/kprobes.c | 81 ++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 48 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2d27ec4feee4..3aaff3365134 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -271,54 +271,6 @@ int kprobe_handler(struct pt_regs *regs) preempt_disable(); kcb = get_kprobe_ctlblk(); - /* Check we're not actually recursing */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - kprobe_opcode_t insn = *p->ainsn.insn; - if (kcb->kprobe_status == KPROBE_HIT_SS && - is_trap(insn)) { - /* Turn off 'trace' bits */ - regs->msr &= ~MSR_SINGLESTEP; - regs->msr |= kcb->kprobe_saved_msr; - goto no_kprobe; - } - /* We have reentered the kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. - */ - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kprobes_inc_nmissed_count(p); - kcb->kprobe_status = KPROBE_REENTER; - if (p->ainsn.boostable >= 0) { - ret = try_to_emulate(p, regs); - - if (ret > 0) { - restore_previous_kprobe(kcb); - preempt_enable_no_resched(); - return 1; - } - } - prepare_singlestep(p, regs); - return 1; - } else if (*addr != BREAKPOINT_INSTRUCTION) { - /* If trap variant, then it belongs not to us */ - kprobe_opcode_t cur_insn = *addr; - - if (is_trap(cur_insn)) - goto no_kprobe; - /* The breakpoint instruction was removed by - * another cpu right after we hit, no further - * handling of this interrupt is appropriate - */ - ret = 1; - } - goto no_kprobe; - } - p = get_kprobe(addr); if (!p) { if (*addr != BREAKPOINT_INSTRUCTION) { @@ -343,6 +295,39 @@ int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } + /* Check we're not actually recursing */ + if (kprobe_running()) { + kprobe_opcode_t insn = *p->ainsn.insn; + if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { + /* Turn off 'trace' bits */ + regs->msr &= ~MSR_SINGLESTEP; + regs->msr |= kcb->kprobe_saved_msr; + goto no_kprobe; + } + + /* + * We have reentered the kprobe_handler(), since another probe + * was hit while within the handler. We here save the original + * kprobes variables and just single step on the instruction of + * the new probe without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + kcb->kprobe_status = KPROBE_REENTER; + if (p->ainsn.boostable >= 0) { + ret = try_to_emulate(p, regs); + + if (ret > 0) { + restore_previous_kprobe(kcb); + preempt_enable_no_resched(); + return 1; + } + } + prepare_singlestep(p, regs); + return 1; + } + kcb->kprobe_status = KPROBE_HIT_ACTIVE; set_current_kprobe(p, regs, kcb); if (p->pre_handler && p->pre_handler(p, regs)) { -- cgit v1.2.3-59-g8ed1b From af92bad615be75c6c0d1b1c5b48178360250a187 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 Mar 2020 15:09:40 +0000 Subject: powerpc/kasan: Fix kasan_remap_early_shadow_ro() At the moment kasan_remap_early_shadow_ro() does nothing, because k_end is 0 and k_cur < 0 is always true. Change the test to k_cur != k_end, as done in kasan_init_shadow_page_tables() Signed-off-by: Christophe Leroy Fixes: cbd18991e24f ("powerpc/mm: Fix an Oops in kasan_mmu_init()") Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4e7b56865e01569058914c991143f5961b5d4719.1583507333.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/kasan/kasan_init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index f19526e7d3dc..1a29cf469903 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -101,7 +101,7 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); -- cgit v1.2.3-59-g8ed1b From 697ece78f8f749aeea40f2711389901f0974017a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 10 Mar 2020 17:29:12 +0000 Subject: powerpc/32s: reorder Linux PTE bits to better match Hash PTE bits. Reorder Linux PTE bits to (almost) match Hash PTE bits. RW Kernel : PP = 00 RO Kernel : PP = 00 RW User : PP = 01 RO User : PP = 11 So naturally, we should have _PAGE_USER = 0x001 _PAGE_RW = 0x002 Today 0x001 and 0x002 and _PAGE_PRESENT and _PAGE_HASHPTE which both are software only bits. Switch _PAGE_USER and _PAGE_PRESET Switch _PAGE_RW and _PAGE_HASHPTE This allows to remove a few insns. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c4d6c18a7f8d9d3b899bc492f55fbc40ef38896a.1583861325.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/32/hash.h | 8 ++++---- arch/powerpc/kernel/head_32.S | 9 +++------ arch/powerpc/mm/book3s32/hash_low.S | 14 ++++++-------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h index 2a0a467d2985..34a7215ae81e 100644 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -17,9 +17,9 @@ * updating the accessed and modified bits in the page table tree. */ -#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ -#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_USER 0x001 /* usermode access allowed */ +#define _PAGE_RW 0x002 /* software: user write access allowed */ +#define _PAGE_PRESENT 0x004 /* software: pte contains a translation */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -27,7 +27,7 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_HASHPTE 0x400 /* hash_page has made an HPTE for this pte */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ #ifdef CONFIG_PTE_64BIT diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 97c887950c3c..daaa153950c2 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -348,7 +348,7 @@ BEGIN_MMU_FTR_SECTION andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h #endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r5, 32 - 24, 30, 30 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 FTR_SECTION_ELSE @@ -497,7 +497,6 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r0, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION @@ -565,9 +564,8 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwinm r1,r0,0,30,30 /* _PAGE_RW -> PP msb */ + rlwimi r0,r0,1,30,30 /* _PAGE_USER -> PP msb */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION @@ -645,7 +643,6 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r0,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 877d880890fe..6d236080cb1a 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -35,7 +35,7 @@ mmu_hash_lock: /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: - * _PAGE_RW (0x400) if a write. + * _PAGE_RW (0x002) if a write. * r9 contains the SRR1 value, from which we use the MSR_PR bit. * SPRG_THREAD contains the physical address of the current task's thread. * @@ -69,7 +69,7 @@ _GLOBAL(hash_page) blt+ 112f /* assume user more likely */ lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ - rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ + rlwimi r3,r9,32-14,31,31 /* MSR_PR -> _PAGE_USER */ 112: #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ @@ -94,7 +94,7 @@ _GLOBAL(hash_page) #else rlwimi r8,r4,23,20,28 /* compute pte address */ #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + rlwinm r0,r3,6,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* @@ -310,11 +310,9 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ - and r8,r8,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ + and r8,r5,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,1,30,30 /* _PAGE_USER -> PP msb */ ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION @@ -566,7 +564,7 @@ _GLOBAL(flush_hash_pages) 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ - rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ + rlwinm r8,r8,0,~_PAGE_HASHPTE /* clear HASHPTE bit */ stwcx. r8,0,r5 /* update the pte */ bne- 33b -- cgit v1.2.3-59-g8ed1b From 9475af081ec1fb6cc794a17ae90f2c01aa8a7993 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Thu, 12 Mar 2020 22:04:12 +0800 Subject: PCI: rpaphp: Remove unused variable 'value' Fixes gcc '-Wunused-but-set-variable' warning: drivers/pci/hotplug/rpaphp_core.c: In function is_php_type: drivers/pci/hotplug/rpaphp_core.c:291:16: warning: variable value set but not used [-Wunused-but-set-variable] Reported-by: Hulk Robot Signed-off-by: Chen Zhou Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200312140412.32373-1-chenzhou10@huawei.com --- drivers/pci/hotplug/rpaphp_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index e408e4021cee..5d871ef231fd 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -288,11 +288,10 @@ EXPORT_SYMBOL_GPL(rpaphp_check_drc_props); static int is_php_type(char *drc_type) { - unsigned long value; char *endptr; /* PCI Hotplug nodes have an integer for drc_type */ - value = simple_strtoul(drc_type, &endptr, 10); + simple_strtoul(drc_type, &endptr, 10); if (endptr == drc_type) return 0; -- cgit v1.2.3-59-g8ed1b From 36b78402d97a3b9aeab136feb9b00d8647ec2c20 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 13 Mar 2020 15:18:42 +0530 Subject: powerpc/hash64/devmap: Use H_PAGE_THP_HUGE when setting up huge devmap PTE entries H_PAGE_THP_HUGE is used to differentiate between a THP hugepage and hugetlb hugepage entries. The difference is WRT how we handle hash fault on these address. THP address enables MPSS in segments. We want to manage devmap hugepage entries similar to THP pt entries. Hence use H_PAGE_THP_HUGE for devmap huge PTE entries. With current code while handling hash PTE fault, we do set is_thp = true when finding devmap PTE huge PTE entries. Current code also does the below sequence we setting up huge devmap entries. entry = pmd_mkhuge(pfn_t_pmd(pfn, prot)); if (pfn_t_devmap(pfn)) entry = pmd_mkdevmap(entry); In that case we would find both H_PAGE_THP_HUGE and PAGE_DEVMAP set for huge devmap PTE entries. This results in false positive error like below. kernel BUG at /home/kvaneesh/src/linux/mm/memory.c:4321! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 56 PID: 67996 Comm: t_mmap_dio Not tainted 5.6.0-rc4-59640-g371c804dedbc #128 .... NIP [c00000000044c9e4] __follow_pte_pmd+0x264/0x900 LR [c0000000005d45f8] dax_writeback_one+0x1a8/0x740 Call Trace: str_spec.74809+0x22ffb4/0x2d116c (unreliable) dax_writeback_one+0x1a8/0x740 dax_writeback_mapping_range+0x26c/0x700 ext4_dax_writepages+0x150/0x5a0 do_writepages+0x68/0x180 __filemap_fdatawrite_range+0x138/0x180 file_write_and_wait_range+0xa4/0x110 ext4_sync_file+0x370/0x6e0 vfs_fsync_range+0x70/0xf0 sys_msync+0x220/0x2e0 system_call+0x5c/0x68 This is because our pmd_trans_huge check doesn't exclude _PAGE_DEVMAP. To make this all consistent, update pmd_mkdevmap to set H_PAGE_THP_HUGE and pmd_trans_huge check now excludes _PAGE_DEVMAP correctly. Fixes: ebd31197931d ("powerpc/mm: Add devmap support for ppc64") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200313094842.351830-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 6 ++++++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 8 +++++++- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +++- arch/powerpc/include/asm/book3s/64/radix.h | 5 +++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 8fd8599c9395..3f9ae3585ab9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -156,6 +156,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int hash__has_transparent_hugepage(void); #endif +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + BUG(); + return pmd; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index d1d9177d9ebd..0729c034e56f 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -246,7 +246,7 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == (_PAGE_PTE | H_PAGE_THP_HUGE)); } @@ -272,6 +272,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 201a69e6a355..368b136517e0 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1303,7 +1303,9 @@ extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); + if (radix_enabled()) + return radix__pmd_mkdevmap(pmd); + return hash__pmd_mkdevmap(pmd); } static inline int pmd_devmap(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d97db3ad9aae..a1c60d5b50af 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -263,6 +263,11 @@ static inline int radix__has_transparent_hugepage(void) } #endif +static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); +} + extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); -- cgit v1.2.3-59-g8ed1b From 850507f30c38dff21ed557cb98ab16db26c32bbc Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Wed, 18 Mar 2020 14:00:04 +0800 Subject: selftests/powerpc: Turn off timeout setting for benchmarks, dscr, signal, tm Some specific tests in powerpc can take longer than the default 45 seconds that added in commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") to run, the following test result was collected across 2 Power8 nodes and 1 Power9 node in our pool: powerpc/benchmarks/futex_bench - 52s powerpc/dscr/dscr_sysfs_test - 116s powerpc/signal/signal_fuzzer - 88s powerpc/tm/tm_unavailable_test - 168s powerpc/tm/tm-poison - 240s Thus they will fail with TIMEOUT error. Disable the timeout setting for these sub-tests to allow them finish properly. https://bugs.launchpad.net/bugs/1864642 Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Signed-off-by: Po-Hsu Lin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200318060004.10685-1-po-hsu.lin@canonical.com --- tools/testing/selftests/powerpc/benchmarks/Makefile | 2 ++ tools/testing/selftests/powerpc/benchmarks/settings | 1 + tools/testing/selftests/powerpc/dscr/Makefile | 2 ++ tools/testing/selftests/powerpc/dscr/settings | 1 + tools/testing/selftests/powerpc/signal/Makefile | 2 ++ tools/testing/selftests/powerpc/signal/settings | 1 + tools/testing/selftests/powerpc/tm/Makefile | 2 ++ tools/testing/selftests/powerpc/tm/settings | 1 + 8 files changed, 12 insertions(+) create mode 100644 tools/testing/selftests/powerpc/benchmarks/settings create mode 100644 tools/testing/selftests/powerpc/dscr/settings create mode 100644 tools/testing/selftests/powerpc/signal/settings create mode 100644 tools/testing/selftests/powerpc/tm/settings diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index d40300a65b42..a32a6ab89914 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -2,6 +2,8 @@ TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall TEST_GEN_FILES := exec_target +TEST_FILES := settings + CFLAGS += -O2 top_srcdir = ../../../../.. diff --git a/tools/testing/selftests/powerpc/benchmarks/settings b/tools/testing/selftests/powerpc/benchmarks/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 5df476364b4d..cfa6eedcb66c 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -3,6 +3,8 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ dscr_sysfs_thread_test +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 63b57583e07d..932a032bf036 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -5,6 +5,8 @@ CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/signal/settings b/tools/testing/selftests/powerpc/signal/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index b1d99736f8b8..0b0db8d3857c 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -7,6 +7,8 @@ TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ tm-signal-context-force-tm tm-poison tm-signal-pagefault +TEST_FILES := settings + top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/settings b/tools/testing/selftests/powerpc/tm/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/settings @@ -0,0 +1 @@ +timeout=0 -- cgit v1.2.3-59-g8ed1b From d95fe371ecd28901f11256c610b988ed44e36ee2 Mon Sep 17 00:00:00 2001 From: Pratik Rajesh Sampat Date: Mon, 16 Mar 2020 19:27:43 +0530 Subject: cpufreq: powernv: Fix frame-size-overflow in powernv_cpufreq_work_fn The patch avoids allocating cpufreq_policy on stack hence fixing frame size overflow in 'powernv_cpufreq_work_fn' Fixes: 227942809b52 ("cpufreq: powernv: Restore cpu frequency to policy->cur on unthrottling") Signed-off-by: Pratik Rajesh Sampat Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200316135743.57735-1-psampat@linux.ibm.com --- drivers/cpufreq/powernv-cpufreq.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 03798c4326c6..8646eb197cd9 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -902,6 +902,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); + struct cpufreq_policy *policy; unsigned int cpu; cpumask_t mask; @@ -916,12 +917,14 @@ void powernv_cpufreq_work_fn(struct work_struct *work) chip->restore = false; for_each_cpu(cpu, &mask) { int index; - struct cpufreq_policy policy; - cpufreq_get_policy(&policy, cpu); - index = cpufreq_table_find_index_c(&policy, policy.cur); - powernv_cpufreq_target_index(&policy, index); - cpumask_andnot(&mask, &mask, policy.cpus); + policy = cpufreq_cpu_get(cpu); + if (!policy) + continue; + index = cpufreq_table_find_index_c(policy, policy->cur); + powernv_cpufreq_target_index(policy, index); + cpumask_andnot(&mask, &mask, policy->cpus); + cpufreq_cpu_put(policy); } out: put_online_cpus(); -- cgit v1.2.3-59-g8ed1b From d4a8e98621543d5798421eed177978bf2b3cdd11 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 20 Mar 2020 14:21:15 +1100 Subject: powerpc/64: Setup a paca before parsing device tree etc. Currently we set up the paca after parsing the device tree for CPU features. Prior to that, r13 contains random data, which means there is random data in r13 while we're running the generic dt parsing code. This random data varies depending on whether we boot through a vmlinux or a zImage: for the vmlinux case it's usually around zero, but for zImages we see random values like 912a72603d420015. This is poor practice, and can also lead to difficult-to-debug crashes. For example, when kcov is enabled, the kcov instrumentation attempts to read preempt_count out of the current task, which goes via the paca. This then crashes in the zImage case. Similarly stack protector can cause crashes if r13 is bogus, by reading from the stack canary in the paca. To resolve this: - move the paca setup to before the CPU feature parsing. - because we no longer have access to CPU feature flags in paca setup, change the HV feature test in the paca setup path to consider the actual value of the MSR rather than the CPU feature. Translations get switched on once we leave early_setup, so I think we'd already catch any other cases where the paca or task aren't set up. Boot tested on a P9 guest and host. Fixes: fb0b0a73b223 ("powerpc: Enable kcov") Fixes: 06ec27aea9fc ("powerpc/64: add stack protector support") Cc: stable@vger.kernel.org # v4.20+ Reviewed-by: Andrew Donnellan Suggested-by: Michael Ellerman Signed-off-by: Daniel Axtens [mpe: Reword comments & change log a bit to mention stack protector] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200320032116.1024773-1-mpe@ellerman.id.au --- arch/powerpc/kernel/dt_cpu_ftrs.c | 1 - arch/powerpc/kernel/paca.c | 10 +++++++--- arch/powerpc/kernel/setup_64.c | 30 ++++++++++++++++++++++++------ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 182b4047c1ef..36bc0d5c4f3a 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void) /* Initialize the base environment -- clear FSCR/HFSCR. */ hv_mode = !!(mfmsr() & MSR_HV); if (hv_mode) { - /* CPU_FTR_HVMODE is used early in PACA setup */ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; mtspr(SPRN_HFSCR, 0); } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 949eceb254d8..0ee6308541b1 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -214,11 +214,15 @@ void setup_paca(struct paca_struct *new_paca) /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); #else - /* In HV mode, we setup both HPACA and PACA to avoid problems + /* + * In HV mode, we setup both HPACA and PACA to avoid problems * if we do a GET_PACA() before the feature fixups have been - * applied + * applied. + * + * Normally you should test against CPU_FTR_HVMODE, but CPU features + * are not yet set up when we first reach here. */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) + if (mfmsr() & MSR_HV) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e05e6dd67ae6..17886d147dd0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -285,18 +285,36 @@ void __init early_setup(unsigned long dt_ptr) /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Try new device tree based feature discovery ... */ - if (!dt_cpu_ftrs_init(__va(dt_ptr))) - /* Otherwise use the old style CPU table */ - identify_cpu(0, mfspr(SPRN_PVR)); - - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + /* + * Assume we're on cpu 0 for now. + * + * We need to load a PACA very early for a few reasons. + * + * The stack protector canary is stored in the paca, so as soon as we + * call any stack protected code we need r13 pointing somewhere valid. + * + * If we are using kcov it will call in_task() in its instrumentation, + * which relies on the current task from the PACA. + * + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as + * printk(), which can trigger both stack protector and kcov. + * + * percpu variables and spin locks also use the paca. + * + * So set up a temporary paca. It will be replaced below once we know + * what CPU we are on. + */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); fixup_boot_paca(); /* -------- printk is now safe to use ------- */ + /* Try new device tree based feature discovery ... */ + if (!dt_cpu_ftrs_init(__va(dt_ptr))) + /* Otherwise use the old style CPU table */ + identify_cpu(0, mfspr(SPRN_PVR)); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); -- cgit v1.2.3-59-g8ed1b From 7053f80d96967d8e72e9f2a724bbfc3906ce2b07 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 20 Mar 2020 14:21:16 +1100 Subject: powerpc/64: Prevent stack protection in early boot The previous commit reduced the amount of code that is run before we setup a paca. However there are still a few remaining functions that run with no paca, or worse, with an arbitrary value in r13 that will be used as a paca pointer. In particular the stack protector canary is stored in the paca, so if stack protector is activated for any of these functions we will read the stack canary from wherever r13 points. If r13 happens to point outside of memory we will get a machine check / checkstop. For example if we modify initialise_paca() to trigger stack protection, and then boot in the mambo simulator with r13 poisoned in skiboot before calling the kernel: DEBUG: 19952232: (19952232): INSTRUCTION: PC=0xC0000000191FC1E8: [0x3C4C006D]: addis r2,r12,0x6D [fetch] DEBUG: 19952236: (19952236): INSTRUCTION: PC=0xC00000001807EAD8: [0x7D8802A6]: mflr r12 [fetch] FATAL ERROR: 19952276: (19952276): Check Stop for 0:0: Machine Check with ME bit of MSR off DEBUG: 19952276: (19952276): INSTRUCTION: PC=0xC0000000191FCA7C: [0xE90D0CF8]: ld r8,0xCF8(r13) [Instruction Failed] INFO: 19952276: (19952277): ** Execution stopped: Mambo Error, Machine Check Stop, ** systemsim % bt pc: 0xC0000000191FCA7C initialise_paca+0x54 lr: 0xC0000000191FC22C early_setup+0x44 stack:0x00000000198CBED0 0x0 +0x0 stack:0x00000000198CBF00 0xC0000000191FC22C early_setup+0x44 stack:0x00000000198CBF90 0x1801C968 +0x1801C968 So annotate the relevant functions to ensure stack protection is never enabled for them. Fixes: 06ec27aea9fc ("powerpc/64: add stack protector support") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200320032116.1024773-2-mpe@ellerman.id.au --- arch/powerpc/kernel/paca.c | 4 ++-- arch/powerpc/kernel/setup.h | 6 ++++++ arch/powerpc/kernel/setup_64.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0ee6308541b1..3f91ccaa9c74 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -176,7 +176,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init initialise_paca(struct paca_struct *new_paca, int cpu) +void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -205,7 +205,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) } /* Put the paca pointer into r13 and SPRG_PACA */ -void setup_paca(struct paca_struct *new_paca) +void __nostackprotector setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2dd0d9cb5a20..2ec835574cc9 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,6 +8,12 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H +#ifdef CONFIG_CC_IS_CLANG +#define __nostackprotector +#else +#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) +#endif + void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 17886d147dd0..438a9befce41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -279,7 +279,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init early_setup(unsigned long dt_ptr) +void __init __nostackprotector early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; -- cgit v1.2.3-59-g8ed1b From 8645aaa87963439007773ed8862ae6a29ea15eae Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Mar 2020 18:38:59 +1100 Subject: powerpc/eeh: Add sysfs files in late probe Move creating the EEH specific sysfs files into eeh_add_device_late() rather than being open-coded all over the place. Calling the function is generally done immediately after calling eeh_add_device_late() anyway. This is also a correctness fix since currently the sysfs files will be added even if the EEH probe happens to fail. Similarly, on pseries we currently add the sysfs files before calling eeh_add_device_late(). This is flat-out broken since the sysfs files require the pci_dev->dev.archdata.edev pointer to be set, and that is done in eeh_add_device_late(). Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306073904.4737-1-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 3 --- arch/powerpc/kernel/eeh.c | 24 +----------------------- arch/powerpc/kernel/of_platform.c | 3 --- arch/powerpc/kernel/pci-common.c | 3 --- arch/powerpc/platforms/powernv/eeh-powernv.c | 1 - arch/powerpc/platforms/pseries/eeh_pseries.c | 3 +-- 6 files changed, 2 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 6f9b2a12540a..5a349079057d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -305,7 +305,6 @@ void eeh_add_device_early(struct pci_dn *); void eeh_add_device_tree_early(struct pci_dn *); void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); -void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -368,8 +367,6 @@ static inline void eeh_add_device_late(struct pci_dev *dev) { } static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } -static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } - static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 17cb3e9b5697..087891214739 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1210,6 +1210,7 @@ void eeh_add_device_late(struct pci_dev *dev) dev->dev.archdata.edev = edev; eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); } /** @@ -1237,29 +1238,6 @@ void eeh_add_device_tree_late(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); -/** - * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to add EEH sysfs files for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_sysfs_files(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_sysfs_add_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_sysfs_files(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); - /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 427fc22f72b6..cb6880092dd7 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -86,9 +86,6 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Add probed PCI devices to the device model */ pci_bus_add_devices(phb->bus); - /* sysfs files should only be added after devices are added */ - eeh_add_sysfs_files(phb->bus); - return 0; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c6c03416a151..3d2b1cf30b80 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1404,9 +1404,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(bus); - - /* sysfs files should only be added after devices are added */ - eeh_add_sysfs_files(bus); } EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 6f300ab7f0e9..ef727ecd99cd 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -48,7 +48,6 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev) dev_dbg(&pdev->dev, "EEH: Setting up device\n"); eeh_add_device_early(pdn); eeh_add_device_late(pdev); - eeh_sysfs_add_device(pdev); } static int pnv_eeh_init(void) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 893ba3f562c4..95bbf9102584 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -68,7 +68,6 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) } #endif eeh_add_device_early(pdn); - eeh_add_device_late(pdev); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -78,7 +77,7 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) eeh_add_to_parent_pe(edev); /* Add as VF PE type */ } #endif - eeh_sysfs_add_device(pdev); + eeh_add_device_late(pdev); } /* -- cgit v1.2.3-59-g8ed1b From 2d0953f7d5acc53a97d0dbfab8b1827e9897a7e6 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Mar 2020 18:39:00 +1100 Subject: powerpc/eeh: Remove eeh_add_device_tree_late() On pseries and PowerNV pcibios_bus_add_device() calls eeh_add_device_late() so there's no need to do a separate tree traversal to bind the eeh_dev and pci_dev together setting up the PHB at boot. As a result we can remove eeh_add_device_tree_late(). Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306073904.4737-2-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 3 --- arch/powerpc/kernel/eeh.c | 25 ------------------------- arch/powerpc/kernel/of_platform.c | 3 --- arch/powerpc/kernel/pci-common.c | 3 --- 4 files changed, 34 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5a349079057d..5d1078166417 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -304,7 +304,6 @@ void eeh_addr_cache_init(void); void eeh_add_device_early(struct pci_dn *); void eeh_add_device_tree_early(struct pci_dn *); void eeh_add_device_late(struct pci_dev *); -void eeh_add_device_tree_late(struct pci_bus *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -365,8 +364,6 @@ static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } static inline void eeh_add_device_late(struct pci_dev *dev) { } -static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } - static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 087891214739..9cb33706ef80 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1213,31 +1213,6 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_sysfs_add_device(dev); } -/** - * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to perform EEH initialization for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_device_tree_late(struct pci_bus *bus) -{ - struct pci_dev *dev; - - if (eeh_has_flag(EEH_FORCE_DISABLED)) - return; - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); - /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index cb6880092dd7..64edac81c633 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -80,9 +80,6 @@ static int of_pci_phb_probe(struct platform_device *dev) */ pcibios_claim_one_bus(phb->bus); - /* Finish EEH setup */ - eeh_add_device_tree_late(phb->bus); - /* Add probed PCI devices to the device model */ pci_bus_add_devices(phb->bus); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 3d2b1cf30b80..8983afa6d62a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1399,9 +1399,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) pci_assign_unassigned_bus_resources(bus); } - /* Fixup EEH */ - eeh_add_device_tree_late(bus); - /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(bus); } -- cgit v1.2.3-59-g8ed1b From a4b4f61db8ccbd2744fc00bed3bdd912278f4595 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Mar 2020 18:39:01 +1100 Subject: powerpc/eeh: Do early EEH init only when required The pci hotplug helper (pci_hp_add_devices()) calls eeh_add_device_tree_early() to scan the device-tree for new PCI devices and do the early EEH probe before the device is scanned. This early probe is a no-op in a lot of cases because: a) The early init is only required to satisfy a PAPR requirement that EEH be configured before we start doing config accesses. On PowerNV it is a no-op. b) It's a no-op for devices that have already had their eeh_dev initialised. There are four callers of pci_hp_add_devices(): 1. arch/powerpc/kernel/eeh_driver.c Here the hotplug helper is called when re-scanning pci_devs that were removed during an EEH recovery pass. The EEH stat for each removed device (the eeh_dev) is retained across a recovery pass so the early init is a no-op in this case. 2. drivers/pci/hotplug/pnv_php.c This is also a no-op since the PowerNV hotplug driver is, suprisingly, PowerNV specific. 3. drivers/pci/hotplug/rpaphp_core.c 4. drivers/pci/hotplug/rpaphp_pci.c In these two cases new devices have been hotplugged and FW has provided new DT nodes for each. These are the only two cases where the EEH we might have new PCI device nodes in the DT so these are the only two cases where the early EEH probe needs to be done. We can move the calls to eeh_add_device_tree_early() to the locations where it's needed and remove it from the generic path. This is preparation for making the early EEH probe pseries specific. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306073904.4737-3-oohall@gmail.com --- arch/powerpc/kernel/pci-hotplug.c | 2 -- drivers/pci/hotplug/rpaphp_core.c | 2 ++ drivers/pci/hotplug/rpaphp_pci.c | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index d6a67f814983..bf83f76563a3 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -112,8 +112,6 @@ void pci_hp_add_devices(struct pci_bus *bus) struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); - eeh_add_device_tree_early(PCI_DN(dn)); - phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 5d871ef231fd..392a936c17de 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -493,6 +493,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return retval; if (state == PRESENT) { + eeh_add_device_tree_early(PCI_DN(slot->dn)); + pci_lock_rescan_remove(); pci_hp_add_devices(slot->bus); pci_unlock_rescan_remove(); diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index beca61badeea..61ebbd832afb 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -95,8 +95,10 @@ int rpaphp_enable_slot(struct slot *slot) return -EINVAL; } - if (list_empty(&bus->devices)) + if (list_empty(&bus->devices)) { + eeh_add_device_tree_early(PCI_DN(slot->dn)); pci_hp_add_devices(bus); + } if (!list_empty(&bus->devices)) { slot->state = CONFIGURED; -- cgit v1.2.3-59-g8ed1b From 3ff32efb62b624e4c07d002ffb294d209689bedd Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Mar 2020 18:39:02 +1100 Subject: powerpc/eeh: Remove PHB check in probe This check for a missing PHB has existing in various forms since the initial PPC64 port was upstreamed in 2002. The idea seems to be that we need to guard against creating pci-specific data structures for the non-pci children of a PCI device tree node (e.g. USB devices). However, we only create pci_dn structures for DT nodes that correspond to PCI devices so there's not much point in doing this check in the eeh_probe path. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306073904.4737-4-oohall@gmail.com --- arch/powerpc/kernel/eeh.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9cb33706ef80..a9e4ca7b5e09 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1120,7 +1120,6 @@ core_initcall_sync(eeh_init); */ void eeh_add_device_early(struct pci_dn *pdn) { - struct pci_controller *phb = pdn ? pdn->phb : NULL; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); if (!edev) @@ -1129,11 +1128,6 @@ void eeh_add_device_early(struct pci_dn *pdn) if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) return; - /* USB Bus children of PCI devices will not have BUID's */ - if (NULL == phb || - (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) - return; - eeh_ops->probe(pdn, NULL); } -- cgit v1.2.3-59-g8ed1b From b6eebb093cad0feb56c717611ee0d2d7c66b4ec7 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Mar 2020 18:39:03 +1100 Subject: powerpc/eeh: Make early EEH init pseries specific The eeh_ops->probe() function is called from two different contexts: 1. On pseries, where we set EEH_PROBE_MODE_DEVTREE, it's called in eeh_add_device_early() which is supposed to run before we create a pci_dev. 2. On PowerNV, where we set EEH_PROBE_MODE_DEV, it's called in eeh_device_add_late() which is supposed to run *after* the pci_dev is created. The "early" probe is required because PAPR requires that we perform an RTAS call to enable EEH support on a device before we start interacting with it via config space or MMIO. This requirement doesn't exist on PowerNV and shoehorning two completely separate initialisation paths into a common interface just results in a convoluted code everywhere. Additionally the early probe requires the probe function to take an pci_dn rather than a pci_dev argument. We'd like to make pci_dn a pseries specific data structure since there's no real requirement for them on PowerNV. To help both goals move the early probe into the pseries containment zone so the platform depedence is more explicit. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306073904.4737-5-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 14 +++--- arch/powerpc/kernel/eeh.c | 46 -------------------- arch/powerpc/kernel/of_platform.c | 6 +-- arch/powerpc/platforms/powernv/eeh-powernv.c | 6 --- arch/powerpc/platforms/pseries/eeh_pseries.c | 65 ++++++++++++++++++++++------ arch/powerpc/platforms/pseries/pci_dlpar.c | 2 +- drivers/pci/hotplug/rpadlpar_core.c | 2 +- drivers/pci/hotplug/rpaphp_core.c | 2 +- drivers/pci/hotplug/rpaphp_pci.c | 2 +- 9 files changed, 64 insertions(+), 81 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5d1078166417..8580238e4852 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -301,8 +301,6 @@ int __exit eeh_ops_unregister(const char *name); int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_init(void); -void eeh_add_device_early(struct pci_dn *); -void eeh_add_device_tree_early(struct pci_dn *); void eeh_add_device_late(struct pci_dev *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); @@ -358,10 +356,6 @@ static inline int eeh_check_failure(const volatile void __iomem *token) static inline void eeh_addr_cache_init(void) { } -static inline void eeh_add_device_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } - static inline void eeh_add_device_late(struct pci_dev *dev) { } static inline void eeh_remove_device(struct pci_dev *dev) { } @@ -370,6 +364,14 @@ static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_IO_ERROR_VALUE(size) (-1UL) #endif /* CONFIG_EEH */ +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH) +void pseries_eeh_init_edev(struct pci_dn *pdn); +void pseries_eeh_init_edev_recursive(struct pci_dn *pdn); +#else +static inline void pseries_eeh_add_device_early(struct pci_dn *pdn) { } +static inline void pseries_eeh_add_device_tree_early(struct pci_dn *pdn) { } +#endif + #ifdef CONFIG_PPC64 /* * MMIO read/write operations with EEH support. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index a9e4ca7b5e09..55d3ef6e5b9c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1106,52 +1106,6 @@ static int eeh_init(void) core_initcall_sync(eeh_init); -/** - * eeh_add_device_early - Enable EEH for the indicated device node - * @pdn: PCI device node for which to set up EEH - * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. - */ -void eeh_add_device_early(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - - if (!edev) - return; - - if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) - return; - - eeh_ops->probe(pdn, NULL); -} - -/** - * eeh_add_device_tree_early - Enable EEH for the indicated device - * @pdn: PCI device node - * - * This routine must be used to perform EEH initialization for the - * indicated PCI device that was added after system boot (e.g. - * hotplug, dlpar). - */ -void eeh_add_device_tree_early(struct pci_dn *pdn) -{ - struct pci_dn *n; - - if (!pdn) - return; - - list_for_each_entry(n, &pdn->child_list, list) - eeh_add_device_tree_early(n); - eeh_add_device_early(pdn); -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); - /** * eeh_add_device_late - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 64edac81c633..71a3f97dc988 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -62,13 +62,9 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); - /* Create EEH devices for the PHB */ + /* Create EEH PEs for the PHB */ eeh_dev_phb_init_dynamic(phb); - /* Register devices with EEH */ - if (dev->dev.of_node->child) - eeh_add_device_tree_early(PCI_DN(dev->dev.of_node)); - /* Scan the bus */ pcibios_scan_phb(phb); if (phb->bus == NULL) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ef727ecd99cd..eaa8dfefa124 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -40,13 +40,7 @@ static int eeh_event_irq = -EINVAL; void pnv_pcibios_bus_add_device(struct pci_dev *pdev) { - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED)) - return; - dev_dbg(&pdev->dev, "EEH: Setting up device\n"); - eeh_add_device_early(pdn); eeh_add_device_late(pdev); } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 95bbf9102584..1ca7cf0fc0d8 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -67,7 +67,7 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; } #endif - eeh_add_device_early(pdn); + pseries_eeh_init_edev(pdn); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -221,15 +221,16 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) } /** - * pseries_eeh_probe - EEH probe on the given device + * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn + * * @pdn: PCI device node - * @data: Unused * - * When EEH module is installed during system boot, all PCI devices - * are checked one by one to see if it supports EEH. The function - * is introduced for the purpose. + * When we discover a new PCI device via the device-tree we create a + * corresponding pci_dn and we allocate, but don't initialise, an eeh_dev. + * This function takes care of the initialisation and inserts the eeh_dev + * into the correct eeh_pe. If no eeh_pe exists we'll allocate one. */ -static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) +void pseries_eeh_init_edev(struct pci_dn *pdn) { struct eeh_dev *edev; struct eeh_pe pe; @@ -237,18 +238,35 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) int enable = 0; int ret; - /* Retrieve OF node and eeh device */ + if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))) + return; + + /* + * Find the eeh_dev for this pdn. The storage for the eeh_dev was + * allocated at the same time as the pci_dn. + * + * XXX: We should probably re-visit that. + */ edev = pdn_to_eeh_dev(pdn); - if (!edev || edev->pe) - return NULL; + if (!edev) + return; + + /* + * If ->pe is set then we've already probed this device. We hit + * this path when a pci_dev is removed and rescanned while recovering + * a PE (i.e. for devices where the driver doesn't support error + * recovery). + */ + if (edev->pe) + return; /* Check class/vendor/device IDs */ if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code) - return NULL; + return; /* Skip for PCI-ISA bridge */ if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) - return NULL; + return; eeh_edev_dbg(edev, "Probing device\n"); @@ -315,9 +333,29 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) /* Save memory bars */ eeh_save_bars(edev); +} + +/** + * pseries_eeh_init_edev_recursive - Enable EEH for the indicated device + * @pdn: PCI device node + * + * This routine must be used to perform EEH initialization for the + * indicated PCI device that was added after system boot (e.g. + * hotplug, dlpar). + */ +void pseries_eeh_init_edev_recursive(struct pci_dn *pdn) +{ + struct pci_dn *n; + + if (!pdn) + return; + + list_for_each_entry(n, &pdn->child_list, list) + pseries_eeh_init_edev_recursive(n); - return NULL; + pseries_eeh_init_edev(pdn); } +EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive); /** * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable @@ -775,7 +813,6 @@ static int pseries_notify_resume(struct pci_dn *pdn) static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, - .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 361986e4354e..b3a38f5a6b68 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -37,7 +37,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) eeh_dev_phb_init_dynamic(phb); if (dn->child) - eeh_add_device_tree_early(PCI_DN(dn)); + pseries_eeh_init_edev_recursive(PCI_DN(dn)); pcibios_scan_phb(phb); pcibios_finish_adding_to_bus(phb->bus); diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 977946e4e613..c5eb509c72f0 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -140,7 +140,7 @@ static void dlpar_pci_add_bus(struct device_node *dn) struct pci_controller *phb = pdn->phb; struct pci_dev *dev = NULL; - eeh_add_device_tree_early(pdn); + pseries_eeh_init_edev_recursive(pdn); /* Add EADS device to PHB bus, adding new entry to bus->devices */ dev = of_create_pci_dev(dn, phb->bus, pdn->devfn); diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 392a936c17de..6504869efabc 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -493,7 +493,7 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return retval; if (state == PRESENT) { - eeh_add_device_tree_early(PCI_DN(slot->dn)); + pseries_eeh_init_edev_recursive(PCI_DN(slot->dn)); pci_lock_rescan_remove(); pci_hp_add_devices(slot->bus); diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index 61ebbd832afb..c380bdacd146 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -96,7 +96,7 @@ int rpaphp_enable_slot(struct slot *slot) } if (list_empty(&bus->devices)) { - eeh_add_device_tree_early(PCI_DN(slot->dn)); + pseries_eeh_init_edev_recursive(PCI_DN(slot->dn)); pci_hp_add_devices(bus); } -- cgit v1.2.3-59-g8ed1b From e86350f70a02e5b4e26b0eccedb575a7490bc834 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Mar 2020 18:39:04 +1100 Subject: powerpc/eeh: Rework eeh_ops->probe() With the EEH early probe now being pseries specific there's no need for eeh_ops->probe() to take a pci_dn. Instead, we can make it take a pci_dev and use the probe function to map a pci_dev to an eeh_dev. This allows the platform to implement it's own method for finding (or creating) an eeh_dev for a given pci_dev which also removes a use of pci_dn in generic EEH code. This patch also renames eeh_device_add_late() to eeh_device_probe(). This better reflects what it does does and removes the last vestiges of the early/late EEH probe split. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306073904.4737-6-oohall@gmail.com --- arch/powerpc/include/asm/eeh.h | 6 ++-- arch/powerpc/kernel/eeh.c | 44 +++++++++++++++------------- arch/powerpc/platforms/powernv/eeh-powernv.c | 30 +++++++++---------- arch/powerpc/platforms/pseries/eeh_pseries.c | 23 ++++++++++++++- 4 files changed, 62 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8580238e4852..964a54292b36 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -215,7 +215,7 @@ enum { struct eeh_ops { char *name; int (*init)(void); - void* (*probe)(struct pci_dn *pdn, void *data); + struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *delay); @@ -301,7 +301,7 @@ int __exit eeh_ops_unregister(const char *name); int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_init(void); -void eeh_add_device_late(struct pci_dev *); +void eeh_probe_device(struct pci_dev *pdev); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -356,7 +356,7 @@ static inline int eeh_check_failure(const volatile void __iomem *token) static inline void eeh_addr_cache_init(void) { } -static inline void eeh_add_device_late(struct pci_dev *dev) { } +static inline void eeh_probe_device(struct pci_dev *dev) { } static inline void eeh_remove_device(struct pci_dev *dev) { } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 55d3ef6e5b9c..7cdcb413bb44 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1107,35 +1107,43 @@ static int eeh_init(void) core_initcall_sync(eeh_init); /** - * eeh_add_device_late - Perform EEH initialization for the indicated pci device + * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH * * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -void eeh_add_device_late(struct pci_dev *dev) +void eeh_probe_device(struct pci_dev *dev) { - struct pci_dn *pdn; struct eeh_dev *edev; - if (!dev) + pr_debug("EEH: Adding device %s\n", pci_name(dev)); + + /* + * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was + * already called for this device. + */ + if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) { + pci_dbg(dev, "Already bound to an eeh_dev!\n"); return; + } - pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); - edev = pdn_to_eeh_dev(pdn); - eeh_edev_dbg(edev, "Adding device\n"); - if (edev->pdev == dev) { - eeh_edev_dbg(edev, "Device already referenced!\n"); + edev = eeh_ops->probe(dev); + if (!edev) { + pr_debug("EEH: Adding device failed\n"); return; } /* - * The EEH cache might not be removed correctly because of - * unbalanced kref to the device during unplug time, which - * relies on pcibios_release_device(). So we have to remove - * that here explicitly. + * FIXME: We rely on pcibios_release_device() to remove the + * existing EEH state. The release function is only called if + * the pci_dev's refcount drops to zero so if something is + * keeping a ref to a device (e.g. a filesystem) we need to + * remove the old EEH state. + * + * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ - if (edev->pdev) { + if (edev->pdev && edev->pdev != dev) { eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1146,17 +1154,11 @@ void eeh_add_device_late(struct pci_dev *dev) * into error handler afterwards. */ edev->mode |= EEH_DEV_NO_HANDLER; - - edev->pdev = NULL; - dev->dev.archdata.edev = NULL; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - + /* bind the pdev and the edev together */ edev->pdev = dev; dev->dev.archdata.edev = edev; - eeh_addr_cache_insert_dev(dev); eeh_sysfs_add_device(dev); } diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index eaa8dfefa124..79409e005fcd 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -41,7 +41,7 @@ static int eeh_event_irq = -EINVAL; void pnv_pcibios_bus_add_device(struct pci_dev *pdev) { dev_dbg(&pdev->dev, "EEH: Setting up device\n"); - eeh_add_device_late(pdev); + eeh_probe_device(pdev); } static int pnv_eeh_init(void) @@ -340,23 +340,13 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) /** * pnv_eeh_probe - Do probe on PCI device - * @pdn: PCI device node - * @data: unused + * @pdev: pci_dev to probe * - * When EEH module is installed during system boot, all PCI devices - * are checked one by one to see if it supports EEH. The function - * is introduced for the purpose. By default, EEH has been enabled - * on all PCI devices. That's to say, we only need do necessary - * initialization on the corresponding eeh device and create PE - * accordingly. - * - * It's notable that's unsafe to retrieve the EEH device through - * the corresponding PCI device. During the PCI device hotplug, which - * was possiblly triggered by EEH core, the binding between EEH device - * and the PCI device isn't built yet. + * Create, or find the existing, eeh_dev for this pci_dev. */ -static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) +static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); struct pci_controller *hose = pdn->phb; struct pnv_phb *phb = hose->private_data; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -373,6 +363,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) if (!edev || edev->pe) return NULL; + /* already configured? */ + if (edev->pdev) { + pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n", + __func__, hose->global_number, config_addr >> 8, + PCI_SLOT(config_addr), PCI_FUNC(config_addr)); + return edev; + } + /* Skip for PCI-ISA bridge */ if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; @@ -464,7 +462,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) eeh_edev_dbg(edev, "EEH enabled on device\n"); - return NULL; + return edev; } /** diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 1ca7cf0fc0d8..845342814edc 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -77,7 +77,7 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) eeh_add_to_parent_pe(edev); /* Add as VF PE type */ } #endif - eeh_add_device_late(pdev); + eeh_probe_device(pdev); } /* @@ -335,6 +335,26 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) eeh_save_bars(edev); } +static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + + pdn = pci_get_pdn_by_devfn(pdev->bus, pdev->devfn); + if (!pdn) + return NULL; + + /* + * If the system supports EEH on this device then the eeh_dev was + * configured and inserted into a PE in pseries_eeh_init_edev() + */ + edev = pdn_to_eeh_dev(pdn); + if (!edev || !edev->pe) + return NULL; + + return edev; +} + /** * pseries_eeh_init_edev_recursive - Enable EEH for the indicated device * @pdn: PCI device node @@ -813,6 +833,7 @@ static int pseries_notify_resume(struct pci_dn *pdn) static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, + .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, -- cgit v1.2.3-59-g8ed1b From adde8715cf0571878d37fcb20595aad57b923bab Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 21 Mar 2020 01:24:36 +1000 Subject: powerpc/pseries: Avoid harmless preempt warning Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200320152436.1468651-1-npiggin@gmail.com --- arch/powerpc/platforms/pseries/lpar.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3c3da25b445c..e4ed5317f117 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -636,8 +636,16 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { static int __init vcpudispatch_stats_procfs_init(void) { - if (!lppaca_shared_proc(get_lppaca())) + /* + * Avoid smp_processor_id while preemptible. All CPUs should have + * the same value for lppaca_shared_proc. + */ + preempt_disable(); + if (!lppaca_shared_proc(get_lppaca())) { + preempt_enable(); return 0; + } + preempt_enable(); if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, &vcpudispatch_stats_proc_ops)) -- cgit v1.2.3-59-g8ed1b From af6cf95c4d003fccd6c2ecc99a598fb854b537e7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 23 Mar 2020 15:27:29 -0700 Subject: powerpc/maple: Fix declaration made after definition When building ppc64 defconfig, Clang errors (trimmed for brevity): arch/powerpc/platforms/maple/setup.c:365:1: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] machine_device_initcall(maple, maple_cpc925_edac_setup); ^ machine_device_initcall expands to __define_machine_initcall, which in turn has the macro machine_is used in it, which declares mach_##name with an __attribute__((weak)). define_machine actually defines mach_##name, which in this file happens before the declaration, hence the warning. To fix this, move define_machine after machine_device_initcall so that the declaration occurs before the definition, which matches how machine_device_initcall and define_machine work throughout arch/powerpc. While we're here, remove some spaces before tabs. Fixes: 8f101a051ef0 ("edac: cpc925 MC platform device setup") Reported-by: Nick Desaulniers Suggested-by: Ilie Halip Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200323222729.15365-1-natechancellor@gmail.com --- arch/powerpc/platforms/maple/setup.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 6f019df37916..15b2c6eb506d 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -291,23 +291,6 @@ static int __init maple_probe(void) return 1; } -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = maple_progress, - .power_save = power4_idle, -}; - #ifdef CONFIG_EDAC /* * Register a platform device for CPC925 memory controller on @@ -364,3 +347,20 @@ static int __init maple_cpc925_edac_setup(void) } machine_device_initcall(maple, maple_cpc925_edac_setup); #endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = maple_progress, + .power_save = power4_idle, +}; -- cgit v1.2.3-59-g8ed1b From 21f8b2fa3ca5b01f7a2b51b89ce97a3705a15aa0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Feb 2020 19:38:27 +0000 Subject: powerpc/kprobes: Ignore traps that happened in real mode When a program check exception happens while MMU translation is disabled, following Oops happens in kprobe_handler() in the following code: } else if (*addr != BREAKPOINT_INSTRUCTION) { BUG: Unable to handle kernel data access on read at 0x0000e268 Faulting instruction address: 0xc000ec34 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=16K PREEMPT CMPC885 Modules linked in: CPU: 0 PID: 429 Comm: cat Not tainted 5.6.0-rc1-s3k-dev-00824-g84195dc6c58a #3267 NIP: c000ec34 LR: c000ecd8 CTR: c019cab8 REGS: ca4d3b58 TRAP: 0300 Not tainted (5.6.0-rc1-s3k-dev-00824-g84195dc6c58a) MSR: 00001032 CR: 2a4d3c52 XER: 00000000 DAR: 0000e268 DSISR: c0000000 GPR00: c000b09c ca4d3c10 c66d0620 00000000 ca4d3c60 00000000 00009032 00000000 GPR08: 00020000 00000000 c087de44 c000afe0 c66d0ad0 100d3dd6 fffffff3 00000000 GPR16: 00000000 00000041 00000000 ca4d3d70 00000000 00000000 0000416d 00000000 GPR24: 00000004 c53b6128 00000000 0000e268 00000000 c07c0000 c07bb6fc ca4d3c60 NIP [c000ec34] kprobe_handler+0x128/0x290 LR [c000ecd8] kprobe_handler+0x1cc/0x290 Call Trace: [ca4d3c30] [c000b09c] program_check_exception+0xbc/0x6fc [ca4d3c50] [c000e43c] ret_from_except_full+0x0/0x4 --- interrupt: 700 at 0xe268 Instruction dump: 913e0008 81220000 38600001 3929ffff 91220000 80010024 bb410008 7c0803a6 38210020 4e800020 38600000 4e800020 <813b0000> 6d2a7fe0 2f8a0008 419e0154 ---[ end trace 5b9152d4cdadd06d ]--- kprobe is not prepared to handle events in real mode and functions running in real mode should have been blacklisted, so kprobe_handler() can safely bail out telling 'this trap is not mine' for any trap that happened while in real-mode. If the trap happened with MSR_IR or MSR_DR cleared, return 0 immediately. Reported-by: Larry Finger Fixes: 6cc89bad60a6 ("powerpc/kprobes: Invoke handlers directly") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Christophe Leroy Reviewed-by: Masami Hiramatsu Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/424331e2006e7291a1bfe40e7f3fa58825f565e1.1582054578.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 3aaff3365134..81efb605113e 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -264,6 +264,9 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; + if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing -- cgit v1.2.3-59-g8ed1b From 7074695ac6fb965d478f373b95bc5c636e9f21b0 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 24 Mar 2020 15:29:12 -0300 Subject: powerpc/prom_init: Remove leftover comment The if statement that this comment referred to was removed in commit 11fdb309341c ("powerpc/prom_init: Remove support for OPAL v2"). Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200324182912.1048906-1-farosas@linux.ibm.com --- arch/powerpc/kernel/prom_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 673f13b87db1..806be751c336 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -3477,7 +3477,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ hdr = dt_header_start; - /* Don't print anything after quiesce under OPAL, it crashes OFW */ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); prom_debug("->dt_header_start=0x%lx\n", hdr); -- cgit v1.2.3-59-g8ed1b From a7032637b54186e5649917679727d7feaec932b1 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 12 Aug 2019 14:50:43 -0700 Subject: powerpc: Prefer __section and __printf from compiler_attributes.h Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Signed-off-by: Nick Desaulniers [mpe: Drop changes to a/p/boot which doesn't use linux headers] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190812215052.71840-10-ndesaulniers@google.com --- arch/powerpc/include/asm/cache.h | 2 +- arch/powerpc/kernel/btext.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 72b81015cebe..609cab1d58f2 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -97,7 +97,7 @@ static inline u32 l1_icache_bytes(void) #endif -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) #ifdef CONFIG_PPC_BOOK3S_32 extern long _get_L2CR(void); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6dfceaa820e4..f57712a55815 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -26,7 +26,7 @@ static void scrollscreen(void); #endif -#define __force_data __attribute__((__section__(".data"))) +#define __force_data __section(.data) static int g_loc_X __force_data; static int g_loc_Y __force_data; -- cgit v1.2.3-59-g8ed1b From b1a504a6500df50e83b701b7946b34fce27ad8a3 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Fri, 6 Mar 2020 16:01:40 +0100 Subject: powerpc/xive: Use XIVE_BAD_IRQ instead of zero to catch non configured IPIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CPU is brought up, an IPI number is allocated and recorded under the XIVE CPU structure. Invalid IPI numbers are tracked with interrupt number 0x0. On the PowerNV platform, the interrupt number space starts at 0x10 and this works fine. However, on the sPAPR platform, it is possible to allocate the interrupt number 0x0 and this raises an issue when CPU 0 is unplugged. The XIVE spapr driver tracks allocated interrupt numbers in a bitmask and it is not correctly updated when interrupt number 0x0 is freed. It stays allocated and it is then impossible to reallocate. Fix by using the XIVE_BAD_IRQ value instead of zero on both platforms. Reported-by: David Gibson Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Tested-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306150143.5551-2-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 12 +++--------- arch/powerpc/sysdev/xive/native.c | 4 ++-- arch/powerpc/sysdev/xive/spapr.c | 4 ++-- arch/powerpc/sysdev/xive/xive-internal.h | 7 +++++++ 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 9651ca061828..0e918fe6a4ec 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -68,13 +68,6 @@ static u32 xive_ipi_irq; /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); -/* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this - */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) - /* An invalid CPU target */ #define XIVE_INVALID_TARGET (-1) @@ -1150,7 +1143,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; /* Grab an IPI from the backend, this will populate xc->hw_ipi */ @@ -1187,7 +1180,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; /* Mask the IPI */ @@ -1343,6 +1336,7 @@ static int xive_prepare_cpu(unsigned int cpu) if (np) xc->chip_id = of_get_ibm_chip_id(np); of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0ff6b739052c..50e1a8e02497 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -312,7 +312,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -320,7 +320,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(OPAL_BUSY_DELAY_MS); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 55dc61cb4867..3f15615712b5 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -560,11 +560,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 59cd366e7933..382980f4de2d 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP -- cgit v1.2.3-59-g8ed1b From 97ef275077932c65b1b8ec5022abd737a9fbf3e0 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Fri, 6 Mar 2020 16:01:41 +0100 Subject: powerpc/xive: Fix xmon support on the PowerNV platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PowerNV platform has multiple IRQ chips and the xmon command dumping the state of the XIVE interrupt should only operate on the XIVE IRQ chip. Fixes: 5896163f7f91 ("powerpc/xmon: Improve output of XIVE interrupts") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306150143.5551-3-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 0e918fe6a4ec..fe8d396e2301 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -258,11 +258,15 @@ notrace void xmon_xive_do_dump(int cpu) int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { + struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; u32 target; u8 prio; u32 lirq; + if (!is_xive_irq(chip)) + return -EINVAL; + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); -- cgit v1.2.3-59-g8ed1b From 5191e0ba51df3a886a2040002e09afd53f625232 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Fri, 6 Mar 2020 16:01:42 +0100 Subject: powerpc/xmon: Add source flags to output of XIVE interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some firmwares or hypervisors can advertise different source characteristics. Track their value under XMON. What we are mostly interested in is the StoreEOI flag. Signed-off-by: Cédric Le Goater Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306150143.5551-4-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index fe8d396e2301..0a47ddf3c148 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -280,7 +280,10 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); - xmon_printf("PQ=%c%c", + xmon_printf("flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', val & XIVE_ESB_VAL_P ? 'P' : '-', val & XIVE_ESB_VAL_Q ? 'Q' : '-'); } -- cgit v1.2.3-59-g8ed1b From 930914b7d528fc6b0249bffc00564100bcf6ef75 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Fri, 6 Mar 2020 16:01:43 +0100 Subject: powerpc/xive: Add a debugfs file to dump internal XIVE state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As does XMON, the debugfs file /sys/kernel/debug/powerpc/xive exposes the XIVE internal state of the machine CPUs and interrupts. Available on the PowerNV and sPAPR platforms. Signed-off-by: Cédric Le Goater [mpe: Make the debugfs file 0400] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200306150143.5551-5-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 105 +++++++++++++++++++++++++++++++ arch/powerpc/sysdev/xive/native.c | 3 + arch/powerpc/sysdev/xive/spapr.c | 19 ++++++ arch/powerpc/sysdev/xive/xive-internal.h | 2 + 4 files changed, 129 insertions(+) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 0a47ddf3c148..b294f70f1a67 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -1555,3 +1556,107 @@ static int __init xive_off(char *arg) return 0; } __setup("xive=off", xive_off); + +void xive_debug_show_cpu(struct seq_file *m, int cpu) +{ + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + seq_printf(m, "CPU %d:", cpu); + if (xc) { + seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); + +#ifdef CONFIG_SMP + { + u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + + seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + } +#endif + { + struct xive_q *q = &xc->queue[xive_irq_priority]; + u32 i0, i1, idx; + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", + q->idx, q->toggle, i0, i1); + } + } + } + seq_puts(m, "\n"); +} + +void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) +{ + struct irq_chip *chip = irq_data_get_irq_chip(d); + int rc; + u32 target; + u8 prio; + u32 lirq; + + if (!is_xive_irq(chip)) + return; + + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); + if (rc) { + seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); + return; + } + + seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", + hw_irq, target, prio, lirq); + + if (d) { + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + u64 val = xive_esb_read(xd, XIVE_ESB_GET); + + seq_printf(m, "flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + } + seq_puts(m, "\n"); +} + +static int xive_core_debug_show(struct seq_file *m, void *private) +{ + unsigned int i; + struct irq_desc *desc; + int cpu; + + if (xive_ops->debug_show) + xive_ops->debug_show(m, private); + + for_each_possible_cpu(cpu) + xive_debug_show_cpu(m, cpu); + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + unsigned int hw_irq; + + if (!d) + continue; + + hw_irq = (unsigned int)irqd_to_hwirq(d); + + /* IPIs are special (HW number 0) */ + if (hw_irq) + xive_debug_show_irq(m, hw_irq, d); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_core_debug); + +int xive_core_debug_init(void) +{ + debugfs_create_file("xive", 0400, powerpc_debugfs_root, + NULL, &xive_core_debug_fops); + return 0; +} diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 50e1a8e02497..5218fdc4b29a 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -850,3 +851,5 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) return 0; } EXPORT_SYMBOL_GPL(xive_native_get_vp_state); + +machine_arch_initcall(powernv, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 3f15615712b5..7ab5c6780997 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -645,6 +646,21 @@ static void xive_spapr_sync_source(u32 hw_irq) plpar_int_sync(0, hw_irq); } +static int xive_spapr_debug_show(struct seq_file *m, void *private) +{ + struct xive_irq_bitmap *xibm; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { + memset(buf, 0, PAGE_SIZE); + bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); + seq_printf(m, "bitmap #%d: %s", xibm->count, buf); + } + kfree(buf); + + return 0; +} + static const struct xive_ops xive_spapr_ops = { .populate_irq_data = xive_spapr_populate_irq_data, .configure_irq = xive_spapr_configure_irq, @@ -662,6 +678,7 @@ static const struct xive_ops xive_spapr_ops = { #ifdef CONFIG_SMP .get_ipi = xive_spapr_get_ipi, .put_ipi = xive_spapr_put_ipi, + .debug_show = xive_spapr_debug_show, #endif /* CONFIG_SMP */ .name = "spapr", }; @@ -839,3 +856,5 @@ bool __init xive_spapr_init(void) pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; } + +machine_arch_initcall(pseries, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 382980f4de2d..b7b901da2168 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -57,12 +57,14 @@ struct xive_ops { int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc); void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); #endif + int (*debug_show)(struct seq_file *m, void *private); const char *name; }; bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); +int xive_core_debug_init(void); static inline u32 xive_alloc_order(u32 queue_shift) { -- cgit v1.2.3-59-g8ed1b From 8ec26c25c33d21468a8b39722337463550b15e5b Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Mon, 27 Feb 2017 08:28:14 -0600 Subject: powerpc/xmon: Add ASCII dump to d1,d2,d4,d8 commands. The reason debuggers add an ASCII dump to other types of memory dumps is to give the user visual reference points in the case that ASCII strings are adjacent to other structures or element. For example, when examining the task_struct structure one can look for the comm[] string and use it to locate other important elements. ASCII strings do not have endianess, they exist in memory in the same order regardless of CPU endianess. ASCII strings are, by definition, human readable and so should be presented in a human readable format. For these reasons, the supplemental ASCII dump does not re-order the strings from memory to match the endianess of the corresponding 16, 32, or 64 bit words. That would make the ASCII dump much less useful. Signed-off-by: Douglas Miller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1488205694-13337-1-git-send-email-dougmill@linux.vnet.ibm.com --- arch/powerpc/xmon/xmon.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index ea303b7e4e29..7af840c0fc93 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2713,7 +2713,12 @@ static void dump_by_size(unsigned long addr, long count, int size) printf("%0*llx", size * 2, val); } - printf("\n"); + printf(" |"); + for (j = 0; j < 16; ++j) { + val = temp[j]; + putchar(' ' <= val && val <= '~' ? val : '.'); + } + printf("|\n"); } } -- cgit v1.2.3-59-g8ed1b From 7703889e8ee1b318f632be7ba4d58d9962ecf34f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:39 +1100 Subject: powerpc: Update MAINTAINERS A while back Paul pointed out I'd been maintaining the tree more or less solo for over five years, so perhaps it's time to update the MAINTAINERS entry. Ben & Paul still wrote most of the code, so keep them as Reviewers so they still get Cc'ed on things. But if you're wondering why your patch hasn't been merged that's my fault. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200224233146.23734-1-mpe@ellerman.id.au --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a0d86490c2c6..1e36f88384f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9600,9 +9600,9 @@ F: arch/powerpc/platforms/powermac/ F: drivers/macintosh/ LINUX FOR POWERPC (32-BIT AND 64-BIT) -M: Benjamin Herrenschmidt -M: Paul Mackerras M: Michael Ellerman +R: Benjamin Herrenschmidt +R: Paul Mackerras W: https://github.com/linuxppc/linux/wiki L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ -- cgit v1.2.3-59-g8ed1b From fd65a1aaad488d8346f235816c68203b95bb15f1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:40 +1100 Subject: powerpc: Update wiki link in MAINTAINERS The wiki has moved, update the link. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200224233146.23734-2-mpe@ellerman.id.au --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1e36f88384f9..0081f492e550 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9603,7 +9603,7 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Michael Ellerman R: Benjamin Herrenschmidt R: Paul Mackerras -W: https://github.com/linuxppc/linux/wiki +W: https://github.com/linuxppc/wiki/wiki L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git -- cgit v1.2.3-59-g8ed1b From b24f9ee4526f65bb5acdd68a07f4099486208a9e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:41 +1100 Subject: powerpc: Remove PA SEMI MAINTAINERS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PA SEMI entries have been orphaned for 3 ½ years, so fold them into the main POWERPC entry. The result of get_maintainer.pl is more or less unchanged. Signed-off-by: Michael Ellerman Acked-by: Olof Johansson Link: https://lore.kernel.org/r/20200224233146.23734-3-mpe@ellerman.id.au --- MAINTAINERS | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0081f492e550..1806496279c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9620,6 +9620,8 @@ F: drivers/crypto/vmx/ F: drivers/i2c/busses/i2c-opal.c F: drivers/net/ethernet/ibm/ibmveth.* F: drivers/net/ethernet/ibm/ibmvnic.* +F: drivers/*/*/*pasemi* +F: drivers/*/*pasemi* F: drivers/pci/hotplug/pnv_php.c F: drivers/pci/hotplug/rpa* F: drivers/rtc/rtc-opal.c @@ -9674,13 +9676,6 @@ S: Orphan F: arch/powerpc/*/*virtex* F: arch/powerpc/*/*/*virtex* -LINUX FOR POWERPC PA SEMI PWRFICIENT -L: linuxppc-dev@lists.ozlabs.org -S: Orphan -F: arch/powerpc/platforms/pasemi/ -F: drivers/*/*pasemi* -F: drivers/*/*/*pasemi* - LINUX KERNEL DUMP TEST MODULE (LKDTM) M: Kees Cook S: Maintained @@ -12540,16 +12535,6 @@ W: http://wireless.kernel.org/en/users/Drivers/p54 S: Maintained F: drivers/net/wireless/intersil/p54/ -PA SEMI ETHERNET DRIVER -L: netdev@vger.kernel.org -S: Orphan -F: drivers/net/ethernet/pasemi/* - -PA SEMI SMBUS DRIVER -L: linux-i2c@vger.kernel.org -S: Orphan -F: drivers/i2c/busses/i2c-pasemi.c - PACKING M: Vladimir Oltean L: netdev@vger.kernel.org -- cgit v1.2.3-59-g8ed1b From 1ca99770e5aca6036e88a37610a17d5d65ea9752 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:42 +1100 Subject: powerpc: Mark 4xx as Orphan in MAINTAINERS The 4xx platforms are no longer maintained. Signed-off-by: Michael Ellerman Acked-by: Alistair Popple Link: https://lore.kernel.org/r/20200224233146.23734-4-mpe@ellerman.id.au --- MAINTAINERS | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1806496279c3..e722ecbe415a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9644,11 +9644,8 @@ F: arch/powerpc/platforms/512x/ F: arch/powerpc/platforms/52xx/ LINUX FOR POWERPC EMBEDDED PPC4XX -M: Alistair Popple -M: Matt Porter -W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Orphan F: arch/powerpc/platforms/40x/ F: arch/powerpc/platforms/44x/ -- cgit v1.2.3-59-g8ed1b From ec2debfedeca017dfa741d742ec790a80a73ab62 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:43 +1100 Subject: powerpc: Drop XILINX MAINTAINERS entry This has been orphaned for ~7 years, remove it. Signed-off-by: Michael Ellerman Acked-by: Grant Likely Link: https://lore.kernel.org/r/20200224233146.23734-5-mpe@ellerman.id.au --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e722ecbe415a..1e9f18cd2db9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9667,12 +9667,6 @@ L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/8xx/ -LINUX FOR POWERPC EMBEDDED XILINX VIRTEX -L: linuxppc-dev@lists.ozlabs.org -S: Orphan -F: arch/powerpc/*/*virtex* -F: arch/powerpc/*/*/*virtex* - LINUX KERNEL DUMP TEST MODULE (LKDTM) M: Kees Cook S: Maintained -- cgit v1.2.3-59-g8ed1b From b865a8b560105d08f55f562cbc89719f6d3e816b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:44 +1100 Subject: powerpc: Update MPC5XXX MAINTAINERS entry It's several years since the last commit from Anatolij, so mark MPC5XXX as "Odd Fixes" rather than "Maintained". Also the git link no longer works so remove it. Signed-off-by: Michael Ellerman Acked-by: Anatolij Gustschin Link: https://lore.kernel.org/r/20200224233146.23734-6-mpe@ellerman.id.au --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1e9f18cd2db9..a8c8791800fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9638,8 +9638,7 @@ N: pseries LINUX FOR POWERPC EMBEDDED MPC5XXX M: Anatolij Gustschin L: linuxppc-dev@lists.ozlabs.org -T: git git://git.denx.de/linux-denx-agust.git -S: Maintained +S: Odd Fixes F: arch/powerpc/platforms/512x/ F: arch/powerpc/platforms/52xx/ -- cgit v1.2.3-59-g8ed1b From 790082a2ac154ba66e1f202f471af7ab42e302f9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:45 +1100 Subject: powerpc: Update powermac MAINTAINERS entry Ben is no longer actively maintaining the powermac code, but we know where to find him if something really needs attention. The www.penguinppc.org link is dead so remove it. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200224233146.23734-7-mpe@ellerman.id.au --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a8c8791800fd..0532ab217331 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9593,9 +9593,8 @@ F: include/uapi/linux/lightnvm.h LINUX FOR POWER MACINTOSH M: Benjamin Herrenschmidt -W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org -S: Maintained +S: Odd Fixes F: arch/powerpc/platforms/powermac/ F: drivers/macintosh/ -- cgit v1.2.3-59-g8ed1b From f46b7a8ea0b391658796cc0adfccd1e990b4a223 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 10:31:46 +1100 Subject: powerpc: Update 83xx/85xx MAINTAINERS entry Scott said he was still maintaining this "sort of", so change the status to Odd Fixes. Kumar has long ago moved on to greener pastures. Remove the dead penguinppc.org link. Signed-off-by: Michael Ellerman Acked-by: Scott Wood Link: https://lore.kernel.org/r/20200224233146.23734-8-mpe@ellerman.id.au --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0532ab217331..167f99bc6a3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9649,11 +9649,9 @@ F: arch/powerpc/platforms/44x/ LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX M: Scott Wood -M: Kumar Gala -W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git -S: Maintained +S: Odd fixes F: arch/powerpc/platforms/83xx/ F: arch/powerpc/platforms/85xx/ F: Documentation/devicetree/bindings/powerpc/fsl/ -- cgit v1.2.3-59-g8ed1b From 65de8bb2b7d0d4f5ac3a54aa1f16d36320a4b6ab Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 25 Feb 2020 20:25:34 +1100 Subject: powerpc: Switch 8xx MAINTAINERS entry to Christophe It's over 10 years since the last commit from Vitaly, so I suspect he's moved on to other things. Christophe has been the primary contributor to 8xx in the last several years, so anoint him as the maintainer. Remove the dead penguingppc.org link. Signed-off-by: Michael Ellerman Acked-by: Christophe Leroy Link: https://lore.kernel.org/r/20200225092534.9587-1-mpe@ellerman.id.au --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 167f99bc6a3b..dad68253cc4a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9657,8 +9657,7 @@ F: arch/powerpc/platforms/85xx/ F: Documentation/devicetree/bindings/powerpc/fsl/ LINUX FOR POWERPC EMBEDDED PPC8XX -M: Vitaly Bordug -W: http://www.penguinppc.org/ +M: Christophe Leroy L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/8xx/ -- cgit v1.2.3-59-g8ed1b From 4b4d181d63518334070a877ba789211bde77da9e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 13 Mar 2020 22:20:19 +1100 Subject: powerpc/smp: Drop superfluous NULL check We don't need the NULL check of np, the result is the same because the OF helpers cope with NULL, of_node_to_nid(NULL) == NUMA_NO_NODE (-1). Signed-off-by: Michael Ellerman Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200313112020.28235-1-mpe@ellerman.id.au --- arch/powerpc/kernel/smp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 37c12e3bab9e..aae61a3b3201 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1197,11 +1197,8 @@ int get_physical_package_id(int cpu) */ if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR)) { struct device_node *np = of_get_cpu_node(cpu, NULL); - - if (np) { - pkg_id = of_node_to_nid(np); - of_node_put(np); - } + pkg_id = of_node_to_nid(np); + of_node_put(np); } #endif /* CONFIG_PPC_SPLPAR */ -- cgit v1.2.3-59-g8ed1b From c72e8da06250390bb7759399a32fa0ab6f84e6d1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 13 Mar 2020 22:20:20 +1100 Subject: powerpc/smp: Use IS_ENABLED() to avoid #ifdef We can avoid the #ifdef by using IS_ENABLED() in the existing condition check. Signed-off-by: Michael Ellerman Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200313112020.28235-2-mpe@ellerman.id.au --- arch/powerpc/kernel/smp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index aae61a3b3201..6d2a3a3666f0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1189,18 +1189,17 @@ int get_physical_package_id(int cpu) { int pkg_id = cpu_to_chip_id(cpu); -#ifdef CONFIG_PPC_SPLPAR /* * If the platform is PowerNV or Guest on KVM, ibm,chip-id is * defined. Hence we would return the chip-id as the result of * get_physical_package_id. */ - if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR)) { + if (pkg_id == -1 && firmware_has_feature(FW_FEATURE_LPAR) && + IS_ENABLED(CONFIG_PPC_SPLPAR)) { struct device_node *np = of_get_cpu_node(cpu, NULL); pkg_id = of_node_to_nid(np); of_node_put(np); } -#endif /* CONFIG_PPC_SPLPAR */ return pkg_id; } -- cgit v1.2.3-59-g8ed1b From efbc4303b255bb80ab1283794b36dd5fe1fb0ec3 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 27 Mar 2020 00:19:16 +0530 Subject: powerpc/pseries: Handle UE event for memcpy_mcsafe memcpy_mcsafe has been implemented for power machines which is used by pmem infrastructure, so that an UE encountered during memcpy from pmem devices would not result in panic instead a right error code is returned. The implementation expects machine check handler to ignore the event and set nip to continue the execution from fixup code. Appropriate changes are already made to powernv machine check handler, make similar changes to pseries machine check handler to ignore the the event and set nip to continue execution at the fixup entry if we hit UE at an instruction with a fixup entry. while we are at it, have a common function which searches the exception table entry and updates nip with fixup address, and any future common changes can be made in this function that are valid for both architectures. powernv changes are made by commit 895e3dceeb97 ("powerpc/mce: Handle UE event for memcpy_mcsafe") Reviewed-by: Mahesh Salgaonkar Reviewed-by: Santosh S Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200326184916.31172-1-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/mce.h | 2 ++ arch/powerpc/kernel/mce.c | 14 ++++++++++++++ arch/powerpc/kernel/mce_power.c | 8 ++------ arch/powerpc/platforms/pseries/ras.c | 3 +++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 6a6ddaabdb34..376a395daf32 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -218,6 +218,8 @@ extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); +extern void mce_common_process_ue(struct pt_regs *regs, + struct mce_error_info *mce_err); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 34c1001e9e8b..8077b5fb18a7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -251,6 +252,19 @@ void machine_check_queue_event(void) /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); } + +void mce_common_process_ue(struct pt_regs *regs, + struct mce_error_info *mce_err) +{ + const struct exception_table_entry *entry; + + entry = search_kernel_exception_table(regs->nip); + if (entry) { + mce_err->ignore_event = true; + regs->nip = extable_fixup(entry); + } +} + /* * process pending MCE event from the mce event queue. This function will be * called during syscall exit. diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 1cbf7f1a4e3d..067b094bfeff 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -579,14 +579,10 @@ static long mce_handle_ue_error(struct pt_regs *regs, struct mce_error_info *mce_err) { long handled = 0; - const struct exception_table_entry *entry; - entry = search_kernel_exception_table(regs->nip); - if (entry) { - mce_err->ignore_event = true; - regs->nip = extable_fixup(entry); + mce_common_process_ue(regs, mce_err); + if (mce_err->ignore_event) return 1; - } /* * On specific SCOM read via MMIO we may get a machine check diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 1d7f973c647b..aa6208c8d4f0 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -558,6 +558,9 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) switch (mce_log->error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; + mce_common_process_ue(regs, &mce_err); + if (mce_err.ignore_event) + disposition = RTAS_DISP_FULLY_RECOVERED; switch (err_sub_type) { case MC_ERROR_UE_IFETCH: mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; -- cgit v1.2.3-59-g8ed1b From 968339fad422a58312f67718691b717dac45c399 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 25 Mar 2020 09:42:57 -0700 Subject: powerpc/boot: Delete unneeded .globl _zimage_start .globl sets the symbol binding to STB_GLOBAL while .weak sets the binding to STB_WEAK. GNU as let .weak override .globl since binutils-gdb 5ca547dc2399a0a5d9f20626d4bf5547c3ccfddd (1996). Clang integrated assembler let the last win but it may error in the future. Since it is a convention that only one binding directive is used, just delete .globl. Fixes: ee9d21b3b358 ("powerpc/boot: Ensure _zimage_start is a weak symbol") Signed-off-by: Fangrui Song Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200325164257.170229-1-maskray@google.com --- arch/powerpc/boot/crt0.S | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 92608f34d312..1d83966f5ef6 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -44,9 +44,6 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .globl _zimage_start - /* Clang appears to require the .weak directive to be after the symbol - * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ .weak _zimage_start _zimage_start: .globl _zimage_start_lib -- cgit v1.2.3-59-g8ed1b From 233ba5461838a56c19600216f0919e7cd3aec40e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 20 Mar 2020 16:02:42 +0530 Subject: powerpc/64: Avoid isync in flush_dcache_range() As per ISA an isync is only needed on instruction cache block invalidate. Remove the same from dcache invalidate. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200320103242.229223-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/cacheflush.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 4a1c9f0200e1..e92191b390f3 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -65,17 +65,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; - if (IS_ENABLED(CONFIG_PPC64)) { + if (IS_ENABLED(CONFIG_PPC64)) mb(); /* sync */ - isync(); - } for (i = 0; i < size >> shift; i++, addr += bytes) dcbf(addr); mb(); /* sync */ - if (IS_ENABLED(CONFIG_PPC64)) - isync(); } /* -- cgit v1.2.3-59-g8ed1b From c7def7fbdeaa25feaa19caf4a27c5d10bd8789e4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 Mar 2020 22:47:19 +1100 Subject: powerpc/64/tm: Don't let userspace set regs->trap via sigreturn In restore_tm_sigcontexts() we take the trap value directly from the user sigcontext with no checking: err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); This means we can be in the kernel with an arbitrary regs->trap value. Although that's not immediately problematic, there is a risk we could trigger one of the uses of CHECK_FULL_REGS(): #define CHECK_FULL_REGS(regs) BUG_ON(regs->trap & 1) It can also cause us to unnecessarily save non-volatile GPRs again in save_nvgprs(), which shouldn't be problematic but is still wrong. It's also possible it could trick the syscall restart machinery, which relies on regs->trap not being == 0xc00 (see 9a81c16b5275 ("powerpc: fix double syscall restarts")), though I haven't been able to make that happen. Finally it doesn't match the behaviour of the non-TM case, in restore_sigcontext() which zeroes regs->trap. So change restore_tm_sigcontexts() to zero regs->trap. This was discovered while testing Nick's upcoming rewrite of the syscall entry path. In that series the call to save_nvgprs() prior to signal handling (do_notify_resume()) is removed, which leaves the low-bit of regs->trap uncleared which can then trigger the FULL_REGS() WARNs in setup_tm_sigcontexts(). Fixes: 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200401023836.3286664-1-mpe@ellerman.id.au --- arch/powerpc/kernel/signal_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 84ed2e77ef9c..adfde59cf4ba 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -473,8 +473,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); + /* Don't allow userspace to set the trap value */ + regs->trap = 0; + /* These regs are not checkpointed; they can go in 'regs'. */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); -- cgit v1.2.3-59-g8ed1b From a2e366832f3f4d5e1b47b7c7f7c41977bd5100f4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 Mar 2020 20:41:44 +1000 Subject: powerpc/64: mark emergency stacks valid to unwind Before: WARNING: CPU: 0 PID: 494 at arch/powerpc/kernel/irq.c:343 CPU: 0 PID: 494 Comm: a Tainted: G W NIP: c00000000001ed2c LR: c000000000d13190 CTR: c00000000003f910 REGS: c0000001fffd3870 TRAP: 0700 Tainted: G W MSR: 8000000000021003 CR: 28000488 XER: 00000000 CFAR: c00000000001ec90 IRQMASK: 0 GPR00: c000000000aeb12c c0000001fffd3b00 c0000000012ba300 0000000000000000 GPR04: 0000000000000000 0000000000000000 000000010bd207c8 6b00696e74657272 GPR08: 0000000000000000 0000000000000000 0000000000000000 efbeadde00000000 GPR12: 0000000000000000 c0000000014a0000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 000000010bd207bc GPR28: 0000000000000000 c00000000148a898 0000000000000000 c0000001ffff3f50 NIP [c00000000001ed2c] arch_local_irq_restore.part.0+0xac/0x100 LR [c000000000d13190] _raw_spin_unlock_irqrestore+0x50/0xc0 Call Trace: Instruction dump: 60000000 7d2000a6 71298000 41820068 39200002 7d210164 4bffff9c 60000000 60000000 7d2000a6 71298000 4c820020 <0fe00000> 4e800020 60000000 60000000 After: WARNING: CPU: 0 PID: 499 at arch/powerpc/kernel/irq.c:343 CPU: 0 PID: 499 Comm: a Not tainted NIP: c00000000001ed2c LR: c000000000d13210 CTR: c00000000003f980 REGS: c0000001fffd3870 TRAP: 0700 Not tainted MSR: 8000000000021003 CR: 28000488 XER: 00000000 CFAR: c00000000001ec90 IRQMASK: 0 GPR00: c000000000aeb1ac c0000001fffd3b00 c0000000012ba300 0000000000000000 GPR04: 0000000000000000 0000000000000000 00000001347607c8 6b00696e74657272 GPR08: 0000000000000000 0000000000000000 0000000000000000 efbeadde00000000 GPR12: 0000000000000000 c0000000014a0000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 00000001347607bc GPR28: 0000000000000000 c00000000148a898 0000000000000000 c0000001ffff3f50 NIP [c00000000001ed2c] arch_local_irq_restore.part.0+0xac/0x100 LR [c000000000d13210] _raw_spin_unlock_irqrestore+0x50/0xc0 Call Trace: [c0000001fffd3b20] [c000000000aeb1ac] of_find_property+0x6c/0x90 [c0000001fffd3b70] [c000000000aeb1f0] of_get_property+0x20/0x40 [c0000001fffd3b90] [c000000000042cdc] rtas_token+0x3c/0x70 [c0000001fffd3bb0] [c0000000000dc318] fwnmi_release_errinfo+0x28/0x70 [c0000001fffd3c10] [c0000000000dcd8c] pseries_machine_check_realmode+0x1dc/0x540 [c0000001fffd3cd0] [c00000000003fe04] machine_check_early+0x54/0x70 [c0000001fffd3d00] [c000000000008384] machine_check_early_common+0x134/0x1f0 --- interrupt: 200 at 0x1347607c8 LR = 0x7fffafbd8328 Instruction dump: 60000000 7d2000a6 71298000 41820068 39200002 7d210164 4bffff9c 60000000 60000000 7d2000a6 71298000 4c820020 <0fe00000> 4e800020 60000000 60000000 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200325104144.158362-1-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 110db94cdf3c..009833f928bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1974,6 +1974,32 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ +#ifdef CONFIG_PPC64 + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + +# ifdef CONFIG_PPC_BOOK3S_64 + stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; +# endif +#endif + + return 0; +} + + int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes) { @@ -1985,7 +2011,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; - return valid_irq_stack(sp, p, nbytes); + if (valid_irq_stack(sp, p, nbytes)) + return 1; + + return valid_emergency_stack(sp, p, nbytes); } EXPORT_SYMBOL(validate_sp); -- cgit v1.2.3-59-g8ed1b From a42a239db3262b8185cb1a07a9350392ef1439ca Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:10 +1000 Subject: powerpc/64s/exception: Introduce INT_DEFINE parameter block for code generation The code generation macro arguments are difficult to read, and defaults can't easily be used. This introduces a block where parameters can be set for interrupt handler code generation by the subsequent macros, and adds the first generation macro for interrupt entry. One interrupt handler is converted to the new macros to demonstrate the change, the rest will be coverted all at once. No generated code change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-2-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 77 ++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ffc15f4f079d..1b942c98bc05 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -193,6 +193,61 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtctr reg; \ bctr +/* + * Interrupt code generation macros + */ +#define IVEC .L_IVEC_\name\() +#define IHSRR .L_IHSRR_\name\() +#define IAREA .L_IAREA_\name\() +#define IDAR .L_IDAR_\name\() +#define IDSISR .L_IDSISR_\name\() +#define ISET_RI .L_ISET_RI_\name\() +#define IEARLY .L_IEARLY_\name\() +#define IMASK .L_IMASK_\name\() +#define IKVM_REAL .L_IKVM_REAL_\name\() +#define IKVM_VIRT .L_IKVM_VIRT_\name\() + +#define INT_DEFINE_BEGIN(n) \ +.macro int_define_ ## n name + +#define INT_DEFINE_END(n) \ +.endm ; \ +int_define_ ## n n ; \ +do_define_int n + +.macro do_define_int name + .ifndef IVEC + .error "IVEC not defined" + .endif + .ifndef IHSRR + IHSRR=EXC_STD + .endif + .ifndef IAREA + IAREA=PACA_EXGEN + .endif + .ifndef IDAR + IDAR=0 + .endif + .ifndef IDSISR + IDSISR=0 + .endif + .ifndef ISET_RI + ISET_RI=1 + .endif + .ifndef IEARLY + IEARLY=0 + .endif + .ifndef IMASK + IMASK=0 + .endif + .ifndef IKVM_REAL + IKVM_REAL=0 + .endif + .ifndef IKVM_VIRT + IKVM_VIRT=0 + .endif +.endm + .macro INT_KVM_HANDLER name, vec, hsrr, area, skip TRAMP_KVM_BEGIN(\name\()_kvm) KVM_HANDLER \vec, \hsrr, \area, \skip @@ -474,7 +529,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) */ GET_SCRATCH0(r10) std r10,\area\()+EX_R13(r13) - .if \dar + .if \dar == 1 .if \hsrr mfspr r10,SPRN_HDAR .else @@ -482,7 +537,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif std r10,\area\()+EX_DAR(r13) .endif - .if \dsisr + .if \dsisr == 1 .if \hsrr mfspr r10,SPRN_HDSISR .else @@ -506,6 +561,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif .endm +.macro GEN_INT_ENTRY name, virt, ool=0 + .if ! \virt + INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, ISET_RI, IDAR, IDSISR, IMASK, IKVM_REAL + .else + INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, ISET_RI, IDAR, IDSISR, IMASK, IKVM_VIRT + .endif +.endm + /* * On entry r13 points to the paca, r9-r13 are saved in the paca, * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and @@ -1143,12 +1206,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) bl unrecoverable_exception b . +INT_DEFINE_BEGIN(data_access) + IVEC=0x300 + IDAR=1 + IDSISR=1 + IKVM_REAL=1 +INT_DEFINE_END(data_access) EXC_REAL_BEGIN(data_access, 0x300, 0x80) - INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY data_access, virt=0, ool=1 EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) - INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1 + GEN_INT_ENTRY data_access, virt=1 EXC_VIRT_END(data_access, 0x4300, 0x80) INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 EXC_COMMON_BEGIN(data_access_common) -- cgit v1.2.3-59-g8ed1b From 7cb3a1a03e5f24b9f71a0f98e0dc9763155073ce Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:11 +1000 Subject: powerpc/64s/exception: Add GEN_COMMON macro that uses INT_DEFINE parameters No generated code change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-3-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1b942c98bc05..f3f2ec88b3d8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -206,6 +206,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define IMASK .L_IMASK_\name\() #define IKVM_REAL .L_IKVM_REAL_\name\() #define IKVM_VIRT .L_IKVM_VIRT_\name\() +#define ISTACK .L_ISTACK_\name\() +#define IRECONCILE .L_IRECONCILE_\name\() +#define IKUAP .L_IKUAP_\name\() #define INT_DEFINE_BEGIN(n) \ .macro int_define_ ## n name @@ -246,6 +249,15 @@ do_define_int n .ifndef IKVM_VIRT IKVM_VIRT=0 .endif + .ifndef ISTACK + ISTACK=1 + .endif + .ifndef IRECONCILE + IRECONCILE=1 + .endif + .ifndef IKUAP + IKUAP=1 + .endif .endm .macro INT_KVM_HANDLER name, vec, hsrr, area, skip @@ -670,6 +682,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) .endif .endm +.macro GEN_COMMON name + INT_COMMON IVEC, IAREA, ISTACK, IKUAP, IRECONCILE, IDAR, IDSISR +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -1221,13 +1237,7 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) EXC_VIRT_END(data_access, 0x4300, 0x80) INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 EXC_COMMON_BEGIN(data_access_common) - /* - * Here r13 points to the paca, r9 contains the saved CR, - * SRR0 and SRR1 are saved in r11 and r12, - * r9 - r13 are saved in paca->exgen. - * EX_DAR and EX_DSISR have saved DAR/DSISR - */ - INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1 + GEN_COMMON data_access ld r4,_DAR(r1) ld r5,_DSISR(r1) BEGIN_MMU_FTR_SECTION -- cgit v1.2.3-59-g8ed1b From d52fd3d31b25a01a48e9b31c668846967cb1cfde Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:12 +1000 Subject: powerpc/64s/exception: Add GEN_KVM macro that uses INT_DEFINE parameters No generated code change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-4-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f3f2ec88b3d8..da3c22eea72d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -204,6 +204,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define ISET_RI .L_ISET_RI_\name\() #define IEARLY .L_IEARLY_\name\() #define IMASK .L_IMASK_\name\() +#define IKVM_SKIP .L_IKVM_SKIP_\name\() #define IKVM_REAL .L_IKVM_REAL_\name\() #define IKVM_VIRT .L_IKVM_VIRT_\name\() #define ISTACK .L_ISTACK_\name\() @@ -243,6 +244,9 @@ do_define_int n .ifndef IMASK IMASK=0 .endif + .ifndef IKVM_SKIP + IKVM_SKIP=0 + .endif .ifndef IKVM_REAL IKVM_REAL=0 .endif @@ -265,6 +269,10 @@ do_define_int n KVM_HANDLER \vec, \hsrr, \area, \skip .endm +.macro GEN_KVM name + KVM_HANDLER IVEC, IHSRR, IAREA, IKVM_SKIP +.endm + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* @@ -1226,6 +1234,7 @@ INT_DEFINE_BEGIN(data_access) IVEC=0x300 IDAR=1 IDSISR=1 + IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(data_access) @@ -1235,7 +1244,8 @@ EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) GEN_INT_ENTRY data_access, virt=1 EXC_VIRT_END(data_access, 0x4300, 0x80) -INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 +TRAMP_KVM_BEGIN(data_access_kvm) + GEN_KVM data_access EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DAR(r1) -- cgit v1.2.3-59-g8ed1b From eb204d863b9e7ddd9fdf904b1e94412597beb301 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:13 +1000 Subject: powerpc/64s/exception: Expand EXC_COMMON and EXC_COMMON_ASYNC macros These don't provide a large amount of code sharing. Removing them makes code easier to shuffle around. For example, some of the common instructions will be moved into the common code gen macro. No generated code change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-5-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 160 +++++++++++++++++++++++++---------- 1 file changed, 117 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index da3c22eea72d..0f1da3099c28 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -757,28 +757,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define FINISH_NAP #endif -#define EXC_COMMON(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \ - bl save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret_from_except - -/* - * Like EXC_COMMON, but for exceptions that can occur in the idle task and - * therefore need the special idle handling (finish nap and runlatch) - */ -#define EXC_COMMON_ASYNC(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \ - FINISH_NAP; \ - RUNLATCH_ON; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret_from_except_lite - - /* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. @@ -1349,7 +1327,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) +EXC_COMMON_BEGIN(hardware_interrupt_common) + INT_COMMON 0x500, PACA_EXGEN, 1, 1, 1, 0, 0 + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_IRQ + b ret_from_except_lite EXC_REAL_BEGIN(alignment, 0x600, 0x100) @@ -1455,7 +1439,13 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED EXC_VIRT_END(decrementer, 0x4900, 0x80) INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) +EXC_COMMON_BEGIN(decrementer_common) + INT_COMMON 0x900, PACA_EXGEN, 1, 1, 1, 0, 0 + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl timer_interrupt + b ret_from_except_lite EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80) @@ -1465,7 +1455,12 @@ EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) +EXC_COMMON_BEGIN(hdecrementer_common) + INT_COMMON 0x980, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl hdec_interrupt + b ret_from_except EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) @@ -1475,11 +1470,17 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0 +EXC_COMMON_BEGIN(doorbell_super_common) + INT_COMMON 0xa00, PACA_EXGEN, 1, 1, 1, 0, 0 + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL -EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) + bl doorbell_exception #else -EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception) + bl unknown_exception #endif + b ret_from_except_lite EXC_REAL_NONE(0xb00, 0x100) @@ -1610,7 +1611,12 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) INT_HANDLER single_step, 0xd00, virt=1 EXC_VIRT_END(single_step, 0x4d00, 0x100) INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON(single_step_common, 0xd00, single_step_exception) +EXC_COMMON_BEGIN(single_step_common) + INT_COMMON 0xd00, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl single_step_exception + b ret_from_except EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20) @@ -1641,7 +1647,12 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) +EXC_COMMON_BEGIN(h_instr_storage_common) + INT_COMMON 0xe20, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_except EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20) @@ -1651,7 +1662,12 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) +EXC_COMMON_BEGIN(emulation_assist_common) + INT_COMMON 0xe40, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl emulation_assist_interrupt + b ret_from_except /* @@ -1708,11 +1724,17 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0 +EXC_COMMON_BEGIN(h_doorbell_common) + INT_COMMON 0xe80, PACA_EXGEN, 1, 1, 1, 0, 0 + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL -EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) + bl doorbell_exception #else -EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) + bl unknown_exception #endif + b ret_from_except_lite EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) @@ -1722,7 +1744,13 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) +EXC_COMMON_BEGIN(h_virt_irq_common) + INT_COMMON 0xea0, PACA_EXGEN, 1, 1, 1, 0, 0 + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_IRQ + b ret_from_except_lite EXC_REAL_NONE(0xec0, 0x20) @@ -1738,7 +1766,13 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) +EXC_COMMON_BEGIN(performance_monitor_common) + INT_COMMON 0xf00, PACA_EXGEN, 1, 1, 1, 0, 0 + FINISH_NAP + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl performance_monitor_exception + b ret_from_except_lite EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) @@ -1829,7 +1863,12 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1 EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0 -EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) +EXC_COMMON_BEGIN(facility_unavailable_common) + INT_COMMON 0xf60, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl facility_unavailable_exception + b ret_from_except EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20) @@ -1839,7 +1878,12 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0 -EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) +EXC_COMMON_BEGIN(h_facility_unavailable_common) + INT_COMMON 0xf80, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl facility_unavailable_exception + b ret_from_except EXC_REAL_NONE(0xfa0, 0x20) @@ -1860,7 +1904,12 @@ EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1 -EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) +EXC_COMMON_BEGIN(cbe_system_error_common) + INT_COMMON 0x1200, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl cbe_system_error_exception + b ret_from_except #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) @@ -1874,7 +1923,12 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) INT_HANDLER instruction_breakpoint, 0x1300, virt=1 EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1 -EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) +EXC_COMMON_BEGIN(instruction_breakpoint_common) + INT_COMMON 0x1300, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl instruction_breakpoint_exception + b ret_from_except EXC_REAL_NONE(0x1400, 0x100) @@ -1974,7 +2028,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) b . #endif -EXC_COMMON(denorm_common, 0x1500, unknown_exception) +EXC_COMMON_BEGIN(denorm_common) + INT_COMMON 0x1500, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_except #ifdef CONFIG_CBE_RAS @@ -1983,7 +2042,12 @@ EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1 -EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) +EXC_COMMON_BEGIN(cbe_maintenance_common) + INT_COMMON 0x1600, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl cbe_maintenance_exception + b ret_from_except #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) @@ -1997,11 +2061,16 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) INT_HANDLER altivec_assist, 0x1700, virt=1 EXC_VIRT_END(altivec_assist, 0x5700, 0x100) INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0 +EXC_COMMON_BEGIN(altivec_assist_common) + INT_COMMON 0x1700, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC -EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) + bl altivec_assist_exception #else -EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception) + bl unknown_exception #endif + b ret_from_except #ifdef CONFIG_CBE_RAS @@ -2010,7 +2079,12 @@ EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1 -EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) +EXC_COMMON_BEGIN(cbe_thermal_common) + INT_COMMON 0x1800, PACA_EXGEN, 1, 1, 1, 0, 0 + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl cbe_thermal_exception + b ret_from_except #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -- cgit v1.2.3-59-g8ed1b From 4f50541f6703b99cfe1ba16639740be851a619ae Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:14 +1000 Subject: powerpc/64s/exception: Move all interrupt handlers to new style code gen macros Aside from label names and BUG line numbers, the generated code change is an additional HMI KVM handler added for the "late" KVM handler, because early and late HMI generation is achieved by defining two different interrupt types. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-6-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 556 ++++++++++++++++++++++++++--------- 1 file changed, 418 insertions(+), 138 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0f1da3099c28..0157ba48efe9 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -206,8 +206,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define IMASK .L_IMASK_\name\() #define IKVM_SKIP .L_IKVM_SKIP_\name\() #define IKVM_REAL .L_IKVM_REAL_\name\() +#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name #define IKVM_VIRT .L_IKVM_VIRT_\name\() #define ISTACK .L_ISTACK_\name\() +#define __ISTACK(name) .L_ISTACK_ ## name #define IRECONCILE .L_IRECONCILE_\name\() #define IKUAP .L_IKUAP_\name\() @@ -570,7 +572,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) /* nothing more */ .elseif \early mfctr r10 /* save ctr, even for !RELOCATABLE */ - BRANCH_TO_C000(r11, \name\()_early_common) + BRANCH_TO_C000(r11, \name\()_common) .elseif !\virt INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri .else @@ -843,6 +845,19 @@ __start_interrupts: EXC_VIRT_NONE(0x4000, 0x100) +INT_DEFINE_BEGIN(system_reset) + IVEC=0x100 + IAREA=PACA_EXNMI + /* + * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is + * being used, so a nested NMI exception would corrupt it. + */ + ISET_RI=0 + ISTACK=0 + IRECONCILE=0 + IKVM_REAL=1 +INT_DEFINE_END(system_reset) + EXC_REAL_BEGIN(system_reset, 0x100, 0x100) #ifdef CONFIG_PPC_P7_NAP /* @@ -880,11 +895,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif - INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1 + GEN_INT_ENTRY system_reset, virt=0 /* - * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is - * being used, so a nested NMI exception would corrupt it. - * * In theory, we should not enable relocation here if it was disabled * in SRR1, because the MMU may not be configured to support it (e.g., * SLB may have been cleared). In practice, there should only be a few @@ -893,7 +905,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) */ EXC_REAL_END(system_reset, 0x100, 0x100) EXC_VIRT_NONE(0x4100, 0x100) -INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0 +TRAMP_KVM_BEGIN(system_reset_kvm) + GEN_KVM system_reset #ifdef CONFIG_PPC_P7_NAP TRAMP_REAL_BEGIN(system_reset_idle_wake) @@ -908,8 +921,8 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) * Vectors for the FWNMI option. Share common code. */ TRAMP_REAL_BEGIN(system_reset_fwnmi) - /* See comment at system_reset exception, don't turn on RI */ - INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0 + __IKVM_REAL(system_reset)=0 + GEN_INT_ENTRY system_reset, virt=0 #endif /* CONFIG_PPC_PSERIES */ @@ -929,7 +942,7 @@ EXC_COMMON_BEGIN(system_reset_common) mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE - INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0 + GEN_COMMON system_reset bl save_nvgprs /* * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does @@ -971,23 +984,46 @@ EXC_COMMON_BEGIN(system_reset_common) RFI_TO_USER_OR_KERNEL -EXC_REAL_BEGIN(machine_check, 0x200, 0x100) - INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1 +INT_DEFINE_BEGIN(machine_check_early) + IVEC=0x200 + IAREA=PACA_EXMC /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables * MSR_RI. */ + ISET_RI=0 + ISTACK=0 + IEARLY=1 + IDAR=1 + IDSISR=1 + IRECONCILE=0 + IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ +INT_DEFINE_END(machine_check_early) + +INT_DEFINE_BEGIN(machine_check) + IVEC=0x200 + IAREA=PACA_EXMC + ISET_RI=0 + IDAR=1 + IDSISR=1 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(machine_check) + +EXC_REAL_BEGIN(machine_check, 0x200, 0x100) + GEN_INT_ENTRY machine_check_early, virt=0 EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) #ifdef CONFIG_PPC_PSERIES TRAMP_REAL_BEGIN(machine_check_fwnmi) /* See comment at machine_check exception, don't turn on RI */ - INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1 + GEN_INT_ENTRY machine_check_early, virt=0 #endif -INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1 +TRAMP_KVM_BEGIN(machine_check_kvm) + GEN_KVM machine_check #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ @@ -1039,8 +1075,7 @@ EXC_COMMON_BEGIN(machine_check_early_common) bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - /* We don't touch AMR here, we never go to virtual mode */ - INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1 + GEN_COMMON machine_check_early BEGIN_FTR_SECTION bl enable_machine_check @@ -1128,15 +1163,15 @@ BEGIN_FTR_SECTION mtspr SPRN_CFAR,r10 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) MACHINE_CHECK_HANDLER_WINDUP - /* See comment at machine_check exception, don't turn on RI */ - INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY machine_check, virt=0 EXC_COMMON_BEGIN(machine_check_common) /* * Machine check is different because we use a different * save area: PACA_EXMC instead of PACA_EXGEN. */ - INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1 + GEN_COMMON machine_check + FINISH_NAP /* Enable MSR_RI when finished with PACA_EXMC */ li r10,MSR_RI @@ -1208,6 +1243,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) bl unrecoverable_exception b . + +/** + * 0x300 - Data Storage Interrupt (DSI) + * This interrupt is generated due to a data access which does not have a valid + * page table entry with permissions to allow the data access to be performed. + * DAWR matches also fault here, as do RC updates, and minor misc errors e.g., + * copy/paste, AMO, certain invalid CI accesses, etc. + * + * This interrupt is delivered to the guest (HV bit unchanged). + * + * Linux HPT responds by first attempting to refill the hash table from the + * Linux page table, then going to a full page fault if the Linux page table + * entry was insufficient. RPT goes straight to full page fault. + * + * PR KVM ...? + */ INT_DEFINE_BEGIN(data_access) IVEC=0x300 IDAR=1 @@ -1237,15 +1288,25 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +INT_DEFINE_BEGIN(data_access_slb) + IVEC=0x380 + IAREA=PACA_EXSLB + IRECONCILE=0 + IDAR=1 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(data_access_slb) + EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) - INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1 + GEN_INT_ENTRY data_access_slb, virt=0, ool=1 EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) - INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1 + GEN_INT_ENTRY data_access_slb, virt=1 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) -INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1 +TRAMP_KVM_BEGIN(data_access_slb_kvm) + GEN_KVM data_access_slb EXC_COMMON_BEGIN(data_access_slb_common) - INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0 + GEN_COMMON data_access_slb ld r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION @@ -1269,15 +1330,23 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) b ret_from_except +INT_DEFINE_BEGIN(instruction_access) + IVEC=0x400 + IDAR=2 + IDSISR=2 + IKVM_REAL=1 +INT_DEFINE_END(instruction_access) + EXC_REAL_BEGIN(instruction_access, 0x400, 0x80) - INT_HANDLER instruction_access, 0x400, kvm=1 + GEN_INT_ENTRY instruction_access, virt=0 EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) - INT_HANDLER instruction_access, 0x400, virt=1 + GEN_INT_ENTRY instruction_access, virt=1 EXC_VIRT_END(instruction_access, 0x4400, 0x80) -INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(instruction_access_kvm) + GEN_KVM instruction_access EXC_COMMON_BEGIN(instruction_access_common) - INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2 + GEN_COMMON instruction_access ld r4,_DAR(r1) ld r5,_DSISR(r1) BEGIN_MMU_FTR_SECTION @@ -1289,15 +1358,24 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +INT_DEFINE_BEGIN(instruction_access_slb) + IVEC=0x480 + IAREA=PACA_EXSLB + IRECONCILE=0 + IDAR=2 + IKVM_REAL=1 +INT_DEFINE_END(instruction_access_slb) + EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1 + GEN_INT_ENTRY instruction_access_slb, virt=0 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB + GEN_INT_ENTRY instruction_access_slb, virt=1 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) -INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0 +TRAMP_KVM_BEGIN(instruction_access_slb_kvm) + GEN_KVM instruction_access_slb EXC_COMMON_BEGIN(instruction_access_slb_common) - INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0 + GEN_COMMON instruction_access_slb ld r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION @@ -1320,15 +1398,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b ret_from_except +INT_DEFINE_BEGIN(hardware_interrupt) + IVEC=0x500 + IHSRR=EXC_HV_OR_STD + IMASK=IRQS_DISABLED + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(hardware_interrupt) + EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) - INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY hardware_interrupt, virt=0 EXC_REAL_END(hardware_interrupt, 0x500, 0x100) EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) - INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY hardware_interrupt, virt=1 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) -INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(hardware_interrupt_kvm) + GEN_KVM hardware_interrupt EXC_COMMON_BEGIN(hardware_interrupt_common) - INT_COMMON 0x500, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON hardware_interrupt FINISH_NAP RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD @@ -1336,28 +1423,42 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) b ret_from_except_lite +INT_DEFINE_BEGIN(alignment) + IVEC=0x600 + IDAR=1 + IDSISR=1 + IKVM_REAL=1 +INT_DEFINE_END(alignment) + EXC_REAL_BEGIN(alignment, 0x600, 0x100) - INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY alignment, virt=0 EXC_REAL_END(alignment, 0x600, 0x100) EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) - INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1 + GEN_INT_ENTRY alignment, virt=1 EXC_VIRT_END(alignment, 0x4600, 0x100) -INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(alignment_kvm) + GEN_KVM alignment EXC_COMMON_BEGIN(alignment_common) - INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1 + GEN_COMMON alignment bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception b ret_from_except +INT_DEFINE_BEGIN(program_check) + IVEC=0x700 + IKVM_REAL=1 +INT_DEFINE_END(program_check) + EXC_REAL_BEGIN(program_check, 0x700, 0x100) - INT_HANDLER program_check, 0x700, kvm=1 + GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) - INT_HANDLER program_check, 0x700, virt=1 + GEN_INT_ENTRY program_check, virt=1 EXC_VIRT_END(program_check, 0x4700, 0x100) -INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(program_check_kvm) + GEN_KVM program_check EXC_COMMON_BEGIN(program_check_common) /* * It's possible to receive a TM Bad Thing type program check with @@ -1383,10 +1484,12 @@ EXC_COMMON_BEGIN(program_check_common) mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0 + __ISTACK(program_check)=0 + GEN_COMMON program_check b 3f 2: - INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0 + __ISTACK(program_check)=1 + GEN_COMMON program_check 3: bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD @@ -1394,15 +1497,22 @@ EXC_COMMON_BEGIN(program_check_common) b ret_from_except +INT_DEFINE_BEGIN(fp_unavailable) + IVEC=0x800 + IRECONCILE=0 + IKVM_REAL=1 +INT_DEFINE_END(fp_unavailable) + EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) - INT_HANDLER fp_unavailable, 0x800, kvm=1 + GEN_INT_ENTRY fp_unavailable, virt=0 EXC_REAL_END(fp_unavailable, 0x800, 0x100) EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100) - INT_HANDLER fp_unavailable, 0x800, virt=1 + GEN_INT_ENTRY fp_unavailable, virt=1 EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) -INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(fp_unavailable_kvm) + GEN_KVM fp_unavailable EXC_COMMON_BEGIN(fp_unavailable_common) - INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0 + GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) @@ -1432,15 +1542,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif +INT_DEFINE_BEGIN(decrementer) + IVEC=0x900 + IMASK=IRQS_DISABLED + IKVM_REAL=1 +INT_DEFINE_END(decrementer) + EXC_REAL_BEGIN(decrementer, 0x900, 0x80) - INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY decrementer, virt=0, ool=1 EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) - INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED + GEN_INT_ENTRY decrementer, virt=1 EXC_VIRT_END(decrementer, 0x4900, 0x80) -INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(decrementer_kvm) + GEN_KVM decrementer EXC_COMMON_BEGIN(decrementer_common) - INT_COMMON 0x900, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON decrementer FINISH_NAP RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD @@ -1448,30 +1565,45 @@ EXC_COMMON_BEGIN(decrementer_common) b ret_from_except_lite +INT_DEFINE_BEGIN(hdecrementer) + IVEC=0x980 + IHSRR=EXC_HV + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(hdecrementer) + EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80) - INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY hdecrementer, virt=0 EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) - INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY hdecrementer, virt=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) -INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(hdecrementer_kvm) + GEN_KVM hdecrementer EXC_COMMON_BEGIN(hdecrementer_common) - INT_COMMON 0x980, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON hdecrementer bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl hdec_interrupt b ret_from_except +INT_DEFINE_BEGIN(doorbell_super) + IVEC=0xa00 + IMASK=IRQS_DISABLED + IKVM_REAL=1 +INT_DEFINE_END(doorbell_super) + EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) - INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY doorbell_super, virt=0 EXC_REAL_END(doorbell_super, 0xa00, 0x100) EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) - INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED + GEN_INT_ENTRY doorbell_super, virt=1 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) -INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(doorbell_super_kvm) + GEN_KVM doorbell_super EXC_COMMON_BEGIN(doorbell_super_common) - INT_COMMON 0xa00, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON doorbell_super FINISH_NAP RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD @@ -1604,30 +1736,47 @@ TRAMP_KVM_BEGIN(system_call_kvm) #endif +INT_DEFINE_BEGIN(single_step) + IVEC=0xd00 + IKVM_REAL=1 +INT_DEFINE_END(single_step) + EXC_REAL_BEGIN(single_step, 0xd00, 0x100) - INT_HANDLER single_step, 0xd00, kvm=1 + GEN_INT_ENTRY single_step, virt=0 EXC_REAL_END(single_step, 0xd00, 0x100) EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) - INT_HANDLER single_step, 0xd00, virt=1 + GEN_INT_ENTRY single_step, virt=1 EXC_VIRT_END(single_step, 0x4d00, 0x100) -INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(single_step_kvm) + GEN_KVM single_step EXC_COMMON_BEGIN(single_step_common) - INT_COMMON 0xd00, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON single_step bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl single_step_exception b ret_from_except +INT_DEFINE_BEGIN(h_data_storage) + IVEC=0xe00 + IHSRR=EXC_HV + IDAR=1 + IDSISR=1 + IKVM_SKIP=1 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_data_storage) + EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20) - INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY h_data_storage, virt=0, ool=1 EXC_REAL_END(h_data_storage, 0xe00, 0x20) EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) - INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1 + GEN_INT_ENTRY h_data_storage, virt=1, ool=1 EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) -INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1 +TRAMP_KVM_BEGIN(h_data_storage_kvm) + GEN_KVM h_data_storage EXC_COMMON_BEGIN(h_data_storage_common) - INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1 + GEN_COMMON h_data_storage bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION @@ -1640,30 +1789,46 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b ret_from_except +INT_DEFINE_BEGIN(h_instr_storage) + IVEC=0xe20 + IHSRR=EXC_HV + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_instr_storage) + EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20) - INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_instr_storage, virt=0, ool=1 EXC_REAL_END(h_instr_storage, 0xe20, 0x20) EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) - INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_instr_storage, virt=1, ool=1 EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) -INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(h_instr_storage_kvm) + GEN_KVM h_instr_storage EXC_COMMON_BEGIN(h_instr_storage_common) - INT_COMMON 0xe20, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON h_instr_storage bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception b ret_from_except +INT_DEFINE_BEGIN(emulation_assist) + IVEC=0xe40 + IHSRR=EXC_HV + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(emulation_assist) + EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20) - INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY emulation_assist, virt=0, ool=1 EXC_REAL_END(emulation_assist, 0xe40, 0x20) EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) - INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY emulation_assist, virt=1, ool=1 EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) -INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(emulation_assist_kvm) + GEN_KVM emulation_assist EXC_COMMON_BEGIN(emulation_assist_common) - INT_COMMON 0xe40, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON emulation_assist bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl emulation_assist_interrupt @@ -1675,11 +1840,32 @@ EXC_COMMON_BEGIN(emulation_assist_common) * first, and then eventaully from there to the trampoline to get into virtual * mode. */ +INT_DEFINE_BEGIN(hmi_exception_early) + IVEC=0xe60 + IHSRR=EXC_HV + IEARLY=1 + ISTACK=0 + IRECONCILE=0 + IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ + IKVM_REAL=1 +INT_DEFINE_END(hmi_exception_early) + +INT_DEFINE_BEGIN(hmi_exception) + IVEC=0xe60 + IHSRR=EXC_HV + IMASK=IRQS_DISABLED + IKVM_REAL=1 +INT_DEFINE_END(hmi_exception) + EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20) - INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1 + GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1 EXC_REAL_END(hmi_exception, 0xe60, 0x20) EXC_VIRT_NONE(0x4e60, 0x20) -INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(hmi_exception_early_kvm) + GEN_KVM hmi_exception_early +TRAMP_KVM_BEGIN(hmi_exception_kvm) + GEN_KVM hmi_exception + EXC_COMMON_BEGIN(hmi_exception_early_common) mtctr r10 /* Restore ctr */ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ @@ -1688,8 +1874,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - /* We don't touch AMR here, we never go to virtual mode */ - INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0 + GEN_COMMON hmi_exception_early addi r3,r1,STACK_FRAME_OVERHEAD bl hmi_exception_realmode @@ -1705,10 +1890,10 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) * firmware. */ EXCEPTION_RESTORE_REGS EXC_HV - INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY hmi_exception, virt=0 EXC_COMMON_BEGIN(hmi_exception_common) - INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON hmi_exception FINISH_NAP RUNLATCH_ON bl save_nvgprs @@ -1717,15 +1902,24 @@ EXC_COMMON_BEGIN(hmi_exception_common) b ret_from_except +INT_DEFINE_BEGIN(h_doorbell) + IVEC=0xe80 + IHSRR=EXC_HV + IMASK=IRQS_DISABLED + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_doorbell) + EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) - INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_doorbell, virt=0, ool=1 EXC_REAL_END(h_doorbell, 0xe80, 0x20) EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) - INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_doorbell, virt=1, ool=1 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) -INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(h_doorbell_kvm) + GEN_KVM h_doorbell EXC_COMMON_BEGIN(h_doorbell_common) - INT_COMMON 0xe80, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON h_doorbell FINISH_NAP RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD @@ -1737,15 +1931,24 @@ EXC_COMMON_BEGIN(h_doorbell_common) b ret_from_except_lite +INT_DEFINE_BEGIN(h_virt_irq) + IVEC=0xea0 + IHSRR=EXC_HV + IMASK=IRQS_DISABLED + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_virt_irq) + EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) - INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_virt_irq, virt=0, ool=1 EXC_REAL_END(h_virt_irq, 0xea0, 0x20) EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) - INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1 + GEN_INT_ENTRY h_virt_irq, virt=1, ool=1 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) -INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(h_virt_irq_kvm) + GEN_KVM h_virt_irq EXC_COMMON_BEGIN(h_virt_irq_common) - INT_COMMON 0xea0, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON h_virt_irq FINISH_NAP RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD @@ -1759,15 +1962,22 @@ EXC_REAL_NONE(0xee0, 0x20) EXC_VIRT_NONE(0x4ee0, 0x20) +INT_DEFINE_BEGIN(performance_monitor) + IVEC=0xf00 + IMASK=IRQS_PMI_DISABLED + IKVM_REAL=1 +INT_DEFINE_END(performance_monitor) + EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) - INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1 + GEN_INT_ENTRY performance_monitor, virt=0, ool=1 EXC_REAL_END(performance_monitor, 0xf00, 0x20) EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) - INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED + GEN_INT_ENTRY performance_monitor, virt=1, ool=1 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) -INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(performance_monitor_kvm) + GEN_KVM performance_monitor EXC_COMMON_BEGIN(performance_monitor_common) - INT_COMMON 0xf00, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON performance_monitor FINISH_NAP RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD @@ -1775,15 +1985,22 @@ EXC_COMMON_BEGIN(performance_monitor_common) b ret_from_except_lite +INT_DEFINE_BEGIN(altivec_unavailable) + IVEC=0xf20 + IRECONCILE=0 + IKVM_REAL=1 +INT_DEFINE_END(altivec_unavailable) + EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) - INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1 + GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1 EXC_REAL_END(altivec_unavailable, 0xf20, 0x20) EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20) - INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1 + GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1 EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20) -INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(altivec_unavailable_kvm) + GEN_KVM altivec_unavailable EXC_COMMON_BEGIN(altivec_unavailable_common) - INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0 + GEN_COMMON altivec_unavailable #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION beq 1f @@ -1816,15 +2033,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) b ret_from_except +INT_DEFINE_BEGIN(vsx_unavailable) + IVEC=0xf40 + IRECONCILE=0 + IKVM_REAL=1 +INT_DEFINE_END(vsx_unavailable) + EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) - INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1 + GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1 EXC_REAL_END(vsx_unavailable, 0xf40, 0x20) EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20) - INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1 + GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1 EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20) -INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(vsx_unavailable_kvm) + GEN_KVM vsx_unavailable EXC_COMMON_BEGIN(vsx_unavailable_common) - INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0 + GEN_COMMON vsx_unavailable #ifdef CONFIG_VSX BEGIN_FTR_SECTION beq 1f @@ -1856,30 +2080,44 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b ret_from_except +INT_DEFINE_BEGIN(facility_unavailable) + IVEC=0xf60 + IKVM_REAL=1 +INT_DEFINE_END(facility_unavailable) + EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20) - INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1 + GEN_INT_ENTRY facility_unavailable, virt=0, ool=1 EXC_REAL_END(facility_unavailable, 0xf60, 0x20) EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) - INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1 + GEN_INT_ENTRY facility_unavailable, virt=1, ool=1 EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) -INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(facility_unavailable_kvm) + GEN_KVM facility_unavailable EXC_COMMON_BEGIN(facility_unavailable_common) - INT_COMMON 0xf60, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON facility_unavailable bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception b ret_from_except +INT_DEFINE_BEGIN(h_facility_unavailable) + IVEC=0xf80 + IHSRR=EXC_HV + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(h_facility_unavailable) + EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20) - INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1 EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20) EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) - INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1 EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) -INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(h_facility_unavailable_kvm) + GEN_KVM h_facility_unavailable EXC_COMMON_BEGIN(h_facility_unavailable_common) - INT_COMMON 0xf80, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON h_facility_unavailable bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception @@ -1899,13 +2137,21 @@ EXC_REAL_NONE(0x1100, 0x100) EXC_VIRT_NONE(0x5100, 0x100) #ifdef CONFIG_CBE_RAS +INT_DEFINE_BEGIN(cbe_system_error) + IVEC=0x1200 + IHSRR=EXC_HV + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(cbe_system_error) + EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) - INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY cbe_system_error, virt=0 EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1 +TRAMP_KVM_BEGIN(cbe_system_error_kvm) + GEN_KVM cbe_system_error EXC_COMMON_BEGIN(cbe_system_error_common) - INT_COMMON 0x1200, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON cbe_system_error bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception @@ -1916,15 +2162,22 @@ EXC_VIRT_NONE(0x5200, 0x100) #endif +INT_DEFINE_BEGIN(instruction_breakpoint) + IVEC=0x1300 + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(instruction_breakpoint) + EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100) - INT_HANDLER instruction_breakpoint, 0x1300, kvm=1 + GEN_INT_ENTRY instruction_breakpoint, virt=0 EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100) EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) - INT_HANDLER instruction_breakpoint, 0x1300, virt=1 + GEN_INT_ENTRY instruction_breakpoint, virt=1 EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) -INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1 +TRAMP_KVM_BEGIN(instruction_breakpoint_kvm) + GEN_KVM instruction_breakpoint EXC_COMMON_BEGIN(instruction_breakpoint_common) - INT_COMMON 0x1300, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON instruction_breakpoint bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl instruction_breakpoint_exception @@ -1934,30 +2187,35 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) -EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) - INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV +INT_DEFINE_BEGIN(denorm_exception) + IVEC=0x1500 + IHSRR=EXC_HV + IEARLY=2 +INT_DEFINE_END(denorm_exception) + +EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) + GEN_INT_ENTRY denorm_exception, virt=0 #ifdef CONFIG_PPC_DENORMALISATION mfspr r10,SPRN_HSRR1 andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist #endif - KVMTEST denorm_exception_hv, EXC_HV 0x1500 - INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1 -EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) - + KVMTEST denorm_exception, EXC_HV, 0x1500 + INT_SAVE_SRR_AND_JUMP denorm_exception_common, EXC_HV, 1 +EXC_REAL_END(denorm_exception, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) - INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0 + GEN_INT_ENTRY denorm_exception, virt=1 mfspr r10,SPRN_HSRR1 andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist - INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV + INT_VIRT_SAVE_SRR_AND_JUMP denorm_exception_common, EXC_HV EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else EXC_VIRT_NONE(0x5500, 0x100) #endif - -INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(denorm_exception_kvm) + GEN_KVM denorm_exception #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) @@ -2028,8 +2286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) b . #endif -EXC_COMMON_BEGIN(denorm_common) - INT_COMMON 0x1500, PACA_EXGEN, 1, 1, 1, 0, 0 +EXC_COMMON_BEGIN(denorm_exception_common) + GEN_COMMON denorm_exception bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception @@ -2037,13 +2295,21 @@ EXC_COMMON_BEGIN(denorm_common) #ifdef CONFIG_CBE_RAS +INT_DEFINE_BEGIN(cbe_maintenance) + IVEC=0x1600 + IHSRR=EXC_HV + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(cbe_maintenance) + EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) - INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY cbe_maintenance, virt=0 EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1 +TRAMP_KVM_BEGIN(cbe_maintenance_kvm) + GEN_KVM cbe_maintenance EXC_COMMON_BEGIN(cbe_maintenance_common) - INT_COMMON 0x1600, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON cbe_maintenance bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception @@ -2054,15 +2320,21 @@ EXC_VIRT_NONE(0x5600, 0x100) #endif +INT_DEFINE_BEGIN(altivec_assist) + IVEC=0x1700 + IKVM_REAL=1 +INT_DEFINE_END(altivec_assist) + EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100) - INT_HANDLER altivec_assist, 0x1700, kvm=1 + GEN_INT_ENTRY altivec_assist, virt=0 EXC_REAL_END(altivec_assist, 0x1700, 0x100) EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) - INT_HANDLER altivec_assist, 0x1700, virt=1 + GEN_INT_ENTRY altivec_assist, virt=1 EXC_VIRT_END(altivec_assist, 0x5700, 0x100) -INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0 +TRAMP_KVM_BEGIN(altivec_assist_kvm) + GEN_KVM altivec_assist EXC_COMMON_BEGIN(altivec_assist_common) - INT_COMMON 0x1700, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON altivec_assist bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC @@ -2074,13 +2346,21 @@ EXC_COMMON_BEGIN(altivec_assist_common) #ifdef CONFIG_CBE_RAS +INT_DEFINE_BEGIN(cbe_thermal) + IVEC=0x1800 + IHSRR=EXC_HV + IKVM_SKIP=1 + IKVM_REAL=1 +INT_DEFINE_END(cbe_thermal) + EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) - INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1 + GEN_INT_ENTRY cbe_thermal, virt=0 EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1 +TRAMP_KVM_BEGIN(cbe_thermal_kvm) + GEN_KVM cbe_thermal EXC_COMMON_BEGIN(cbe_thermal_common) - INT_COMMON 0x1800, PACA_EXGEN, 1, 1, 1, 0, 0 + GEN_COMMON cbe_thermal bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception -- cgit v1.2.3-59-g8ed1b From fc589ee416e0b3e704840fb6d5895c29355d6f5a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:15 +1000 Subject: powerpc/64s/exception: Remove old INT_ENTRY macro Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-7-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 68 ++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0157ba48efe9..74bf6e0bf61f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -482,13 +482,13 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * - Fall through and continue executing in real, unrelocated mode. * This is done if early=2. */ -.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0 +.macro GEN_INT_ENTRY name, virt, ool=0 SET_SCRATCH0(r13) /* save r13 */ GET_PACA(r13) - std r9,\area\()+EX_R9(r13) /* save r9 */ + std r9,IAREA+EX_R9(r13) /* save r9 */ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) HMT_MEDIUM - std r10,\area\()+EX_R10(r13) /* save r10 - r12 */ + std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) .if \ool .if !\virt @@ -502,47 +502,47 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif .endif - OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) - OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR) + OPT_SAVE_REG_TO_PACA(IAREA+EX_PPR, r9, CPU_FTR_HAS_PPR) + OPT_SAVE_REG_TO_PACA(IAREA+EX_CFAR, r10, CPU_FTR_CFAR) INTERRUPT_TO_KERNEL - SAVE_CTR(r10, \area\()) + SAVE_CTR(r10, IAREA) mfcr r9 - .if \kvm - KVMTEST \name \hsrr \vec + .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT) + KVMTEST \name IHSRR IVEC .endif - .if \bitmask + .if IMASK lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,\bitmask + andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ - .if \vec == 0x500 || \vec == 0xea0 + .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE - .elseif \vec == 0x900 + .elseif IVEC == 0x900 li r10,PACA_IRQ_DEC - .elseif \vec == 0xa00 || \vec == 0xe80 + .elseif IVEC == 0xa00 || IVEC == 0xe80 li r10,PACA_IRQ_DBELL - .elseif \vec == 0xe60 + .elseif IVEC == 0xe60 li r10,PACA_IRQ_HMI - .elseif \vec == 0xf00 + .elseif IVEC == 0xf00 li r10,PACA_IRQ_PMI .else .abort "Bad maskable vector" .endif - .if \hsrr == EXC_HV_OR_STD + .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION bne masked_Hinterrupt FTR_SECTION_ELSE bne masked_interrupt ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr + .elseif IHSRR bne masked_Hinterrupt .else bne masked_interrupt .endif .endif - std r11,\area\()+EX_R11(r13) - std r12,\area\()+EX_R12(r13) + std r11,IAREA+EX_R11(r13) + std r12,IAREA+EX_R12(r13) /* * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI], @@ -550,47 +550,39 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * not recoverable if they are live. */ GET_SCRATCH0(r10) - std r10,\area\()+EX_R13(r13) - .if \dar == 1 - .if \hsrr + std r10,IAREA+EX_R13(r13) + .if IDAR == 1 + .if IHSRR mfspr r10,SPRN_HDAR .else mfspr r10,SPRN_DAR .endif - std r10,\area\()+EX_DAR(r13) + std r10,IAREA+EX_DAR(r13) .endif - .if \dsisr == 1 - .if \hsrr + .if IDSISR == 1 + .if IHSRR mfspr r10,SPRN_HDSISR .else mfspr r10,SPRN_DSISR .endif - stw r10,\area\()+EX_DSISR(r13) + stw r10,IAREA+EX_DSISR(r13) .endif - .if \early == 2 + .if IEARLY == 2 /* nothing more */ - .elseif \early + .elseif IEARLY mfctr r10 /* save ctr, even for !RELOCATABLE */ BRANCH_TO_C000(r11, \name\()_common) .elseif !\virt - INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri + INT_SAVE_SRR_AND_JUMP \name\()_common, IHSRR, ISET_RI .else - INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr + INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, IHSRR .endif .if \ool .popsection .endif .endm -.macro GEN_INT_ENTRY name, virt, ool=0 - .if ! \virt - INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, ISET_RI, IDAR, IDSISR, IMASK, IKVM_REAL - .else - INT_HANDLER \name, IVEC, \ool, IEARLY, \virt, IHSRR, IAREA, ISET_RI, IDAR, IDSISR, IMASK, IKVM_VIRT - .endif -.endm - /* * On entry r13 points to the paca, r9-r13 are saved in the paca, * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and -- cgit v1.2.3-59-g8ed1b From 6d71759a741362697cde1da8735c3e2c23ff5d5c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:16 +1000 Subject: powerpc/64s/exception: Remove old INT_COMMON macro Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-8-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 51 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 74bf6e0bf61f..90514766dc7d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -591,8 +591,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * If stack=0, then the stack is already set in r1, and r1 is saved in r10. * PPR save and CPU accounting is not done for the !stack case (XXX why not?) */ -.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr - .if \stack +.macro GEN_COMMON name + .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ @@ -609,54 +609,54 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ - .if \stack - .if \kuap + .if ISTACK + .if IKUAP kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10) - SAVE_PPR(\area, r9) + SAVE_PPR(IAREA, r9) 101: .else - .if \kuap + .if IKUAP kuap_save_amr_and_lock r9, r10, cr1 .endif .endif /* Save original regs values from save area to stack frame. */ - ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */ - ld r10,\area+EX_R10(r13) + ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */ + ld r10,IAREA+EX_R10(r13) std r9,GPR9(r1) std r10,GPR10(r1) - ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */ - ld r10,\area+EX_R12(r13) - ld r11,\area+EX_R13(r13) + ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */ + ld r10,IAREA+EX_R12(r13) + ld r11,IAREA+EX_R13(r13) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) - .if \dar - .if \dar == 2 + .if IDAR + .if IDAR == 2 ld r10,_NIP(r1) .else - ld r10,\area+EX_DAR(r13) + ld r10,IAREA+EX_DAR(r13) .endif std r10,_DAR(r1) .endif - .if \dsisr - .if \dsisr == 2 + .if IDSISR + .if IDSISR == 2 ld r10,_MSR(r1) lis r11,DSISR_SRR1_MATCH_64S@h and r10,r10,r11 .else - lwz r10,\area+EX_DSISR(r13) + lwz r10,IAREA+EX_DSISR(r13) .endif std r10,_DSISR(r1) .endif BEGIN_FTR_SECTION_NESTED(66) - ld r10,\area+EX_CFAR(r13) + ld r10,IAREA+EX_CFAR(r13) std r10,ORIG_GPR3(r1) END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) - GET_CTR(r10, \area) + GET_CTR(r10, IAREA) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ @@ -668,26 +668,22 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) mfspr r11,SPRN_XER /* save XER in stackframe */ std r10,SOFTE(r1) std r11,_XER(r1) - li r9,(\vec)+1 + li r9,(IVEC)+1 std r9,_TRAP(r1) /* set trap number */ li r10,0 ld r11,exception_marker@toc(r2) std r10,RESULT(r1) /* clear regs->result */ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ - .if \stack + .if ISTACK ACCOUNT_STOLEN_TIME .endif - .if \reconcile + .if IRECONCILE RECONCILE_IRQ_STATE(r10, r11) .endif .endm -.macro GEN_COMMON name - INT_COMMON IVEC, IAREA, ISTACK, IKUAP, IRECONCILE, IDAR, IDSISR -.endm - /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -2387,7 +2383,8 @@ EXC_COMMON_BEGIN(soft_nmi_common) mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE - INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0 + __ISTACK(decrementer)=0 + GEN_COMMON decrementer bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt -- cgit v1.2.3-59-g8ed1b From b177ae2f8c7c980248fb1edb22e8ab1b0db028af Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:17 +1000 Subject: powerpc/64s/exception: Remove old INT_KVM_HANDLER Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-9-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 55 +++++++++++++++++------------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 90514766dc7d..cba99f9a815b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -266,15 +266,6 @@ do_define_int n .endif .endm -.macro INT_KVM_HANDLER name, vec, hsrr, area, skip - TRAMP_KVM_BEGIN(\name\()_kvm) - KVM_HANDLER \vec, \hsrr, \area, \skip -.endm - -.macro GEN_KVM name - KVM_HANDLER IVEC, IHSRR, IAREA, IKVM_SKIP -.endm - #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* @@ -293,35 +284,35 @@ do_define_int n bne \name\()_kvm .endm -.macro KVM_HANDLER vec, hsrr, area, skip - .if \skip +.macro GEN_KVM name + .if IKVM_SKIP cmpwi r10,KVM_GUEST_MODE_SKIP beq 89f .else BEGIN_FTR_SECTION_NESTED(947) - ld r10,\area+EX_CFAR(r13) + ld r10,IAREA+EX_CFAR(r13) std r10,HSTATE_CFAR(r13) END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) .endif BEGIN_FTR_SECTION_NESTED(948) - ld r10,\area+EX_PPR(r13) + ld r10,IAREA+EX_PPR(r13) std r10,HSTATE_PPR(r13) END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) - ld r10,\area+EX_R10(r13) + ld r10,IAREA+EX_R10(r13) std r12,HSTATE_SCRATCH0(r13) sldi r12,r9,32 /* HSRR variants have the 0x2 bit added to their trap number */ - .if \hsrr == EXC_HV_OR_STD + .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION - ori r12,r12,(\vec + 0x2) + ori r12,r12,(IVEC + 0x2) FTR_SECTION_ELSE - ori r12,r12,(\vec) + ori r12,r12,(IVEC) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - ori r12,r12,(\vec + 0x2) + .elseif IHSRR + ori r12,r12,(IVEC+ 0x2) .else - ori r12,r12,(\vec) + ori r12,r12,(IVEC) .endif #ifdef CONFIG_RELOCATABLE @@ -334,25 +325,25 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r9,HSTATE_SCRATCH1(r13) __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) mtctr r9 - ld r9,\area+EX_R9(r13) + ld r9,IAREA+EX_R9(r13) bctr #else - ld r9,\area+EX_R9(r13) + ld r9,IAREA+EX_R9(r13) b kvmppc_interrupt #endif - .if \skip + .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r9,\area+EX_R9(r13) - ld r10,\area+EX_R10(r13) - .if \hsrr == EXC_HV_OR_STD + ld r9,IAREA+EX_R9(r13) + ld r10,IAREA+EX_R10(r13) + .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION b kvmppc_skip_Hinterrupt FTR_SECTION_ELSE b kvmppc_skip_interrupt ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr + .elseif IHSRR b kvmppc_skip_Hinterrupt .else b kvmppc_skip_interrupt @@ -363,7 +354,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) #else .macro KVMTEST name, hsrr, n .endm -.macro KVM_HANDLER name, vec, hsrr, area, skip +.macro GEN_KVM name .endm #endif @@ -1627,6 +1618,12 @@ EXC_VIRT_NONE(0x4b00, 0x100) * without saving, though xer is not a good idea to use, as hardware may * interpret some bits so it may be costly to change them. */ +INT_DEFINE_BEGIN(system_call) + IVEC=0xc00 + IKVM_REAL=1 + IKVM_VIRT=1 +INT_DEFINE_END(system_call) + .macro SYSTEM_CALL virt #ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* @@ -1720,7 +1717,7 @@ TRAMP_KVM_BEGIN(system_call_kvm) SET_SCRATCH0(r10) std r9,PACA_EXGEN+EX_R9(r13) mfcr r9 - KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0 + GEN_KVM system_call #endif -- cgit v1.2.3-59-g8ed1b From a3cd35be6e535f303539aaf258269d48e6bd60cb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:18 +1000 Subject: powerpc/64s/exception: Add ISIDE option Rather than using DAR=2 to select the i-side registers, add an explicit option. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-10-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cba99f9a815b..4eb099046f9d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -199,6 +199,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define IVEC .L_IVEC_\name\() #define IHSRR .L_IHSRR_\name\() #define IAREA .L_IAREA_\name\() +#define IISIDE .L_IISIDE_\name\() #define IDAR .L_IDAR_\name\() #define IDSISR .L_IDSISR_\name\() #define ISET_RI .L_ISET_RI_\name\() @@ -231,6 +232,9 @@ do_define_int n .ifndef IAREA IAREA=PACA_EXGEN .endif + .ifndef IISIDE + IISIDE=0 + .endif .ifndef IDAR IDAR=0 .endif @@ -542,7 +546,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) */ GET_SCRATCH0(r10) std r10,IAREA+EX_R13(r13) - .if IDAR == 1 + .if IDAR && !IISIDE .if IHSRR mfspr r10,SPRN_HDAR .else @@ -550,7 +554,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif std r10,IAREA+EX_DAR(r13) .endif - .if IDSISR == 1 + .if IDSISR && !IISIDE .if IHSRR mfspr r10,SPRN_HDSISR .else @@ -625,16 +629,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) + .if IDAR - .if IDAR == 2 + .if IISIDE ld r10,_NIP(r1) .else ld r10,IAREA+EX_DAR(r13) .endif std r10,_DAR(r1) .endif + .if IDSISR - .if IDSISR == 2 + .if IISIDE ld r10,_MSR(r1) lis r11,DSISR_SRR1_MATCH_64S@h and r10,r10,r11 @@ -643,6 +649,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif std r10,_DSISR(r1) .endif + BEGIN_FTR_SECTION_NESTED(66) ld r10,IAREA+EX_CFAR(r13) std r10,ORIG_GPR3(r1) @@ -1311,8 +1318,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) INT_DEFINE_BEGIN(instruction_access) IVEC=0x400 - IDAR=2 - IDSISR=2 + IISIDE=1 + IDAR=1 + IDSISR=1 IKVM_REAL=1 INT_DEFINE_END(instruction_access) @@ -1341,7 +1349,8 @@ INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 IAREA=PACA_EXSLB IRECONCILE=0 - IDAR=2 + IISIDE=1 + IDAR=1 IKVM_REAL=1 INT_DEFINE_END(instruction_access_slb) -- cgit v1.2.3-59-g8ed1b From 8729c26e675c356de4179d587af6cd1f16147a39 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:19 +1000 Subject: powerpc/64s/exception: Move real to virt switch into the common handler The real mode interrupt entry points currently use rfid to branch to the common handler in virtual mode. This is a significant amount of code, and forces other code (notably the KVM test) to live in the real mode handler. In the interest of minimising the amount of code that runs unrelocated move the switch to virt mode into the common code, and do it with mtmsrd, which avoids clobbering SRRs (although the post-KVMTEST performance of real-mode interrupt handlers is not a big concern these days). This requires CTR to always be saved (real-mode needs to reach 0xc...) but that's not a huge impact these days. It could be optimized away in future. mpe: Incorporate fix from Nick: It's possible for interrupts to be replayed when TM is enabled and suspended, for example rt_sigreturn, where the mtmsrd MSR_KERNEL in the real-mode entry point to the common handler causes a TM Bad Thing exception (due to attempting to clear suspended). The fix for this is to have replay interrupts go to the _virt entry point and skip the mtmsrd, which matches what happens before this patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-11-npiggin@gmail.com --- arch/powerpc/include/asm/exception-64s.h | 4 - arch/powerpc/kernel/exceptions-64s.S | 265 ++++++++++++++----------------- 2 files changed, 116 insertions(+), 153 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 33f4f72eb035..47bd4ea0837d 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -33,11 +33,7 @@ #include /* PACA save area size in u64 units (exgen, exmc, etc) */ -#if defined(CONFIG_RELOCATABLE) #define EX_SIZE 10 -#else -#define EX_SIZE 9 -#endif /* * maximum recursive depth of MCE exceptions diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4eb099046f9d..42fced32c8af 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -32,16 +32,10 @@ #define EX_CCR 52 #define EX_CFAR 56 #define EX_PPR 64 -#if defined(CONFIG_RELOCATABLE) #define EX_CTR 72 .if EX_SIZE != 10 .error "EX_SIZE is wrong" .endif -#else -.if EX_SIZE != 9 - .error "EX_SIZE is wrong" -.endif -#endif /* * Following are fixed section helper macros. @@ -124,22 +118,6 @@ name: #define EXC_HV 1 #define EXC_STD 0 -#if defined(CONFIG_RELOCATABLE) -/* - * If we support interrupts with relocation on AND we're a relocatable kernel, - * we need to use CTR to get to the 2nd level handler. So, save/restore it - * when required. - */ -#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13) -#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13) -#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg -#else -/* ...else CTR is unused and in register. */ -#define SAVE_CTR(reg, area) -#define GET_CTR(reg, area) mfctr reg -#define RESTORE_CTR(reg, area) -#endif - /* * PPR save/restore macros used in exceptions-64s.S * Used for P7 or later processors @@ -199,6 +177,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define IVEC .L_IVEC_\name\() #define IHSRR .L_IHSRR_\name\() #define IAREA .L_IAREA_\name\() +#define IVIRT .L_IVIRT_\name\() #define IISIDE .L_IISIDE_\name\() #define IDAR .L_IDAR_\name\() #define IDSISR .L_IDSISR_\name\() @@ -232,6 +211,9 @@ do_define_int n .ifndef IAREA IAREA=PACA_EXGEN .endif + .ifndef IVIRT + IVIRT=1 + .endif .ifndef IISIDE IISIDE=0 .endif @@ -325,7 +307,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * outside the head section. CONFIG_RELOCATABLE KVM expects CTR * to be saved in HSTATE_SCRATCH1. */ - mfctr r9 + ld r9,IAREA+EX_CTR(r13) std r9,HSTATE_SCRATCH1(r13) __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) mtctr r9 @@ -362,101 +344,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endm #endif -.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - .if ! \set_ri - xori r10,r10,MSR_RI /* Clear MSR_RI */ - .endif - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - FTR_SECTION_ELSE - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - .endif - LOAD_HANDLER(r10, \label\()) - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mtspr SPRN_HSRR0,r10 - HRFI_TO_KERNEL - FTR_SECTION_ELSE - mtspr SPRN_SRR0,r10 - RFI_TO_KERNEL - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mtspr SPRN_HSRR0,r10 - HRFI_TO_KERNEL - .else - mtspr SPRN_SRR0,r10 - RFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ -.endm - -/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */ -.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr -#ifdef CONFIG_RELOCATABLE - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - FTR_SECTION_ELSE - mfspr r11,SPRN_SRR0 /* save SRR0 */ - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - .endif - LOAD_HANDLER(r12, \label\()) - mtctr r12 - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - FTR_SECTION_ELSE - mfspr r12,SPRN_SRR1 /* and HSRR1 */ - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - .else - mfspr r12,SPRN_SRR1 /* and HSRR1 */ - .endif - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ - bctr -#else - .if \hsrr == EXC_HV_OR_STD - BEGIN_FTR_SECTION - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - FTR_SECTION_ELSE - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - .endif - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ - b \label -#endif -.endm - /* * This is the BOOK3S interrupt entry code macro. * @@ -477,6 +364,23 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) * - Fall through and continue executing in real, unrelocated mode. * This is done if early=2. */ + +.macro GEN_BRANCH_TO_COMMON name, virt + .if \virt +#ifndef CONFIG_RELOCATABLE + b \name\()_common_virt +#else + LOAD_HANDLER(r10, \name\()_common_virt) + mtctr r10 + bctr +#endif + .else + LOAD_HANDLER(r10, \name\()_common_real) + mtctr r10 + bctr + .endif +.endm + .macro GEN_INT_ENTRY name, virt, ool=0 SET_SCRATCH0(r13) /* save r13 */ GET_PACA(r13) @@ -500,8 +404,10 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) OPT_SAVE_REG_TO_PACA(IAREA+EX_PPR, r9, CPU_FTR_HAS_PPR) OPT_SAVE_REG_TO_PACA(IAREA+EX_CFAR, r10, CPU_FTR_CFAR) INTERRUPT_TO_KERNEL - SAVE_CTR(r10, IAREA) + mfctr r10 + std r10,IAREA+EX_CTR(r13) mfcr r9 + .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT) KVMTEST \name IHSRR IVEC .endif @@ -566,27 +472,58 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .if IEARLY == 2 /* nothing more */ .elseif IEARLY - mfctr r10 /* save ctr, even for !RELOCATABLE */ BRANCH_TO_C000(r11, \name\()_common) - .elseif !\virt - INT_SAVE_SRR_AND_JUMP \name\()_common, IHSRR, ISET_RI .else - INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, IHSRR + .if IHSRR == EXC_HV_OR_STD + BEGIN_FTR_SECTION + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + FTR_SECTION_ELSE + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ .endif + GEN_BRANCH_TO_COMMON \name \virt + .endif + .if \ool .popsection .endif .endm /* - * On entry r13 points to the paca, r9-r13 are saved in the paca, - * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and - * SRR1, and relocation is on. - * - * If stack=0, then the stack is already set in r1, and r1 is saved in r10. - * PPR save and CPU accounting is not done for the !stack case (XXX why not?) + * __GEN_COMMON_ENTRY is required to receive the branch from interrupt + * entry, except in the case of the IEARLY handlers. + * This switches to virtual mode and sets MSR[RI]. */ -.macro GEN_COMMON name +.macro __GEN_COMMON_ENTRY name +DEFINE_FIXED_SYMBOL(\name\()_common_real) +\name\()_common_real: + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + /* MSR[RI] is clear iff using SRR regs */ + .if IHSRR == EXC_HV_OR_STD + BEGIN_FTR_SECTION + xori r10,r10,MSR_RI + END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) + .elseif ! IHSRR + xori r10,r10,MSR_RI + .endif + mtmsrd r10 + + .if IVIRT + .balign IFETCH_ALIGN_BYTES +DEFINE_FIXED_SYMBOL(\name\()_common_virt) +\name\()_common_virt: + .endif /* IVIRT */ +.endm + +.macro __GEN_COMMON_BODY name .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ mr r10,r1 /* Save r1 */ @@ -604,6 +541,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + .if ISET_RI + li r10,MSR_RI + mtmsrd r10,1 /* Set MSR_RI */ + .endif + .if ISTACK .if IKUAP kuap_save_amr_and_lock r9, r10, cr1, cr0 @@ -654,7 +596,7 @@ BEGIN_FTR_SECTION_NESTED(66) ld r10,IAREA+EX_CFAR(r13) std r10,ORIG_GPR3(r1) END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) - GET_CTR(r10, IAREA) + ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ @@ -682,6 +624,19 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) .endif .endm +/* + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + * + * If stack=0, then the stack is already set in r1, and r1 is saved in r10. + * PPR save and CPU accounting is not done for the !stack case (XXX why not?) + */ +.macro GEN_COMMON name + __GEN_COMMON_ENTRY \name + __GEN_COMMON_BODY \name +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -834,6 +789,7 @@ EXC_VIRT_NONE(0x4000, 0x100) INT_DEFINE_BEGIN(system_reset) IVEC=0x100 IAREA=PACA_EXNMI + IVIRT=0 /* no virt entry point */ /* * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is * being used, so a nested NMI exception would corrupt it. @@ -913,6 +869,7 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi) #endif /* CONFIG_PPC_PSERIES */ EXC_COMMON_BEGIN(system_reset_common) + __GEN_COMMON_ENTRY system_reset /* * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able * to recover, but nested NMI will notice in_nmi and not recover @@ -928,7 +885,7 @@ EXC_COMMON_BEGIN(system_reset_common) mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE - GEN_COMMON system_reset + __GEN_COMMON_BODY system_reset bl save_nvgprs /* * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does @@ -973,6 +930,7 @@ EXC_COMMON_BEGIN(system_reset_common) INT_DEFINE_BEGIN(machine_check_early) IVEC=0x200 IAREA=PACA_EXMC + IVIRT=0 /* no virt entry point */ /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables @@ -990,6 +948,7 @@ INT_DEFINE_END(machine_check_early) INT_DEFINE_BEGIN(machine_check) IVEC=0x200 IAREA=PACA_EXMC + IVIRT=0 /* no virt entry point */ ISET_RI=0 IDAR=1 IDSISR=1 @@ -1022,7 +981,6 @@ TRAMP_KVM_BEGIN(machine_check_kvm) EXCEPTION_RESTORE_REGS EXC_STD EXC_COMMON_BEGIN(machine_check_early_common) - mtctr r10 /* Restore ctr */ mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 @@ -1061,7 +1019,7 @@ EXC_COMMON_BEGIN(machine_check_early_common) bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - GEN_COMMON machine_check_early + __GEN_COMMON_BODY machine_check_early BEGIN_FTR_SECTION bl enable_machine_check @@ -1448,6 +1406,8 @@ EXC_VIRT_END(program_check, 0x4700, 0x100) TRAMP_KVM_BEGIN(program_check_kvm) GEN_KVM program_check EXC_COMMON_BEGIN(program_check_common) + __GEN_COMMON_ENTRY program_check + /* * It's possible to receive a TM Bad Thing type program check with * userspace register values (in particular r1), but with SRR1 reporting @@ -1473,11 +1433,11 @@ EXC_COMMON_BEGIN(program_check_common) ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ __ISTACK(program_check)=0 - GEN_COMMON program_check + __GEN_COMMON_BODY program_check b 3f 2: __ISTACK(program_check)=1 - GEN_COMMON program_check + __GEN_COMMON_BODY program_check 3: bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD @@ -1861,14 +1821,13 @@ TRAMP_KVM_BEGIN(hmi_exception_kvm) GEN_KVM hmi_exception EXC_COMMON_BEGIN(hmi_exception_early_common) - mtctr r10 /* Restore ctr */ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - GEN_COMMON hmi_exception_early + __GEN_COMMON_BODY hmi_exception_early addi r3,r1,STACK_FRAME_OVERHEAD bl hmi_exception_realmode @@ -2195,7 +2154,9 @@ EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) bne+ denorm_assist #endif KVMTEST denorm_exception, EXC_HV, 0x1500 - INT_SAVE_SRR_AND_JUMP denorm_exception_common, EXC_HV, 1 + mfspr r11,SPRN_HSRR0 + mfspr r12,SPRN_HSRR1 + GEN_BRANCH_TO_COMMON denorm_exception, virt=0 EXC_REAL_END(denorm_exception, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) @@ -2203,7 +2164,9 @@ EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) mfspr r10,SPRN_HSRR1 andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist - INT_VIRT_SAVE_SRR_AND_JUMP denorm_exception_common, EXC_HV + mfspr r11,SPRN_HSRR0 + mfspr r12,SPRN_HSRR1 + GEN_BRANCH_TO_COMMON denorm_exception, virt=1 EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else EXC_VIRT_NONE(0x5500, 0x100) @@ -2374,7 +2337,11 @@ EXC_VIRT_NONE(0x5800, 0x100) std r12,PACA_EXGEN+EX_R12(r13); \ GET_SCRATCH0(r10); \ std r10,PACA_EXGEN+EX_R13(r13); \ - INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1 + mfspr r11,SPRN_SRR0; /* save SRR0 */ \ + mfspr r12,SPRN_SRR1; /* and SRR1 */ \ + LOAD_HANDLER(r10, soft_nmi_common); \ + mtctr r10; \ + bctr /* * Branch to soft_nmi_interrupt using the emergency stack. The emergency @@ -2390,7 +2357,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE __ISTACK(decrementer)=0 - GEN_COMMON decrementer + __GEN_COMMON_BODY decrementer bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt @@ -2790,12 +2757,12 @@ handle_dabr_fault: h_doorbell_common_msgclr: LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) PPC_MSGCLR(3) - b h_doorbell_common + b h_doorbell_common_virt doorbell_super_common_msgclr: LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) PPC_MSGCLRP(3) - b doorbell_super_common + b doorbell_super_common_virt /* * Called from arch_local_irq_enable when an interrupt needs @@ -2821,20 +2788,20 @@ _GLOBAL(__replay_interrupt) mfcr r9 ori r12,r12,MSR_EE cmpwi r3,0x900 - beq decrementer_common + beq decrementer_common_virt cmpwi r3,0x500 BEGIN_FTR_SECTION - beq h_virt_irq_common + beq h_virt_irq_common_virt FTR_SECTION_ELSE - beq hardware_interrupt_common + beq hardware_interrupt_common_virt ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) cmpwi r3,0xf00 - beq performance_monitor_common + beq performance_monitor_common_virt BEGIN_FTR_SECTION cmpwi r3,0xa00 beq h_doorbell_common_msgclr cmpwi r3,0xe60 - beq hmi_exception_common + beq hmi_exception_common_virt FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common_msgclr -- cgit v1.2.3-59-g8ed1b From 0eddf327e1dc56f901dd40447d5af3a396f7052b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:20 +1000 Subject: powerpc/64s/exception: Move soft-mask test to common code As well as moving code out of the unrelocated vectors, this allows the masked handlers to be moved to common code, and allows the soft_nmi handler to be generated more like a regular handler. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-12-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 106 ++++++++++++++++------------------- 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 42fced32c8af..a47f2e5922d8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -411,36 +411,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT) KVMTEST \name IHSRR IVEC .endif - .if IMASK - lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,IMASK - /* Associate vector numbers with bits in paca->irq_happened */ - .if IVEC == 0x500 || IVEC == 0xea0 - li r10,PACA_IRQ_EE - .elseif IVEC == 0x900 - li r10,PACA_IRQ_DEC - .elseif IVEC == 0xa00 || IVEC == 0xe80 - li r10,PACA_IRQ_DBELL - .elseif IVEC == 0xe60 - li r10,PACA_IRQ_HMI - .elseif IVEC == 0xf00 - li r10,PACA_IRQ_PMI - .else - .abort "Bad maskable vector" - .endif - - .if IHSRR == EXC_HV_OR_STD - BEGIN_FTR_SECTION - bne masked_Hinterrupt - FTR_SECTION_ELSE - bne masked_interrupt - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - .elseif IHSRR - bne masked_Hinterrupt - .else - bne masked_interrupt - .endif - .endif std r11,IAREA+EX_R11(r13) std r12,IAREA+EX_R12(r13) @@ -524,6 +494,37 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt) .endm .macro __GEN_COMMON_BODY name + .if IMASK + lbz r10,PACAIRQSOFTMASK(r13) + andi. r10,r10,IMASK + /* Associate vector numbers with bits in paca->irq_happened */ + .if IVEC == 0x500 || IVEC == 0xea0 + li r10,PACA_IRQ_EE + .elseif IVEC == 0x900 + li r10,PACA_IRQ_DEC + .elseif IVEC == 0xa00 || IVEC == 0xe80 + li r10,PACA_IRQ_DBELL + .elseif IVEC == 0xe60 + li r10,PACA_IRQ_HMI + .elseif IVEC == 0xf00 + li r10,PACA_IRQ_PMI + .else + .abort "Bad maskable vector" + .endif + + .if IHSRR == EXC_HV_OR_STD + BEGIN_FTR_SECTION + bne masked_Hinterrupt + FTR_SECTION_ELSE + bne masked_interrupt + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + bne masked_Hinterrupt + .else + bne masked_interrupt + .endif + .endif + .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ mr r10,r1 /* Save r1 */ @@ -2330,18 +2331,10 @@ EXC_VIRT_NONE(0x5800, 0x100) #ifdef CONFIG_PPC_WATCHDOG -#define MASKED_DEC_HANDLER_LABEL 3f - -#define MASKED_DEC_HANDLER(_H) \ -3: /* soft-nmi */ \ - std r12,PACA_EXGEN+EX_R12(r13); \ - GET_SCRATCH0(r10); \ - std r10,PACA_EXGEN+EX_R13(r13); \ - mfspr r11,SPRN_SRR0; /* save SRR0 */ \ - mfspr r12,SPRN_SRR1; /* and SRR1 */ \ - LOAD_HANDLER(r10, soft_nmi_common); \ - mtctr r10; \ - bctr +INT_DEFINE_BEGIN(soft_nmi) + IVEC=0x900 + ISTACK=0 +INT_DEFINE_END(soft_nmi) /* * Branch to soft_nmi_interrupt using the emergency stack. The emergency @@ -2353,19 +2346,16 @@ EXC_VIRT_NONE(0x5800, 0x100) * and run it entirely with interrupts hard disabled. */ EXC_COMMON_BEGIN(soft_nmi_common) + mfspr r11,SPRN_SRR0 mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE - __ISTACK(decrementer)=0 - __GEN_COMMON_BODY decrementer + __GEN_COMMON_BODY soft_nmi bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt b ret_from_except -#else /* CONFIG_PPC_WATCHDOG */ -#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ -#define MASKED_DEC_HANDLER(_H) #endif /* CONFIG_PPC_WATCHDOG */ /* @@ -2384,7 +2374,6 @@ masked_Hinterrupt: .else masked_interrupt: .endif - std r11,PACA_EXGEN+EX_R11(r13) lbz r11,PACAIRQHAPPENED(r13) or r11,r11,r10 stb r11,PACAIRQHAPPENED(r13) @@ -2393,26 +2382,30 @@ masked_interrupt: lis r10,0x7fff ori r10,r10,0xffff mtspr SPRN_DEC,r10 - b MASKED_DEC_HANDLER_LABEL +#ifdef CONFIG_PPC_WATCHDOG + b soft_nmi_common +#else + b 2f +#endif 1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK beq 2f + xori r12,r12,MSR_EE /* clear MSR_EE */ .if \hsrr - mfspr r10,SPRN_HSRR1 - xori r10,r10,MSR_EE /* clear MSR_EE */ - mtspr SPRN_HSRR1,r10 + mtspr SPRN_HSRR1,r12 .else - mfspr r10,SPRN_SRR1 - xori r10,r10,MSR_EE /* clear MSR_EE */ - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR1,r12 .endif ori r11,r11,PACA_IRQ_HARD_DIS stb r11,PACAIRQHAPPENED(r13) 2: /* done */ + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 mtcrf 0x80,r9 std r1,PACAR1(r13) ld r9,PACA_EXGEN+EX_R9(r13) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) /* returns to kernel where r13 must be set up, so don't restore it */ .if \hsrr HRFI_TO_KERNEL @@ -2420,7 +2413,6 @@ masked_interrupt: RFI_TO_KERNEL .endif b . - MASKED_DEC_HANDLER(\hsrr\()) .endm TRAMP_REAL_BEGIN(stf_barrier_fallback) @@ -2527,7 +2519,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) * instruction code patches (which end up in the common .text area) * cannot reach these if they are put there. */ -USE_FIXED_SECTION(virt_trampolines) +USE_TEXT_SECTION() MASKED_INTERRUPT EXC_STD MASKED_INTERRUPT EXC_HV -- cgit v1.2.3-59-g8ed1b From 9600f261acaaabd476d7833cec2dd20f2919f1a0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:21 +1000 Subject: powerpc/64s/exception: Move KVM test to common code This allows more code to be moved out of unrelocated regions. The system call KVMTEST is changed to be open-coded and remain in the tramp area to avoid having to move it to entry_64.S. The custom nature of the system call entry code means the hcall case can be made more streamlined than regular interrupt handlers. mpe: Incorporate fix from Nick: Moving KVM test to the common entry code missed the case of HMI and MCE, which do not do __GEN_COMMON_ENTRY (because they don't want to switch to virt mode). This means a MCE or HMI exception that is taken while KVM is running a guest context will not be switched out of that context, and KVM won't be notified. Found by running sigfuz in guest with patched host on POWER9 DD2.3, which causes some TM related HMI interrupts (which are expected and supposed to be handled by KVM). This fix adds a __GEN_REALMODE_COMMON_ENTRY for those handlers to add the KVM test. This makes them look a little more like other handlers that all use __GEN_COMMON_ENTRY. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-13-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 260 +++++++++++++++++--------------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 -- arch/powerpc/kvm/book3s_segment.S | 7 - 3 files changed, 139 insertions(+), 139 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a47f2e5922d8..1bc73acceb9a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -44,7 +44,6 @@ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these) * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use) - * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated * EXC_COMMON - After switching to virtual, relocated mode. */ @@ -74,13 +73,6 @@ name: #define TRAMP_VIRT_BEGIN(name) \ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#define TRAMP_KVM_BEGIN(name) \ - TRAMP_VIRT_BEGIN(name) -#else -#define TRAMP_KVM_BEGIN(name) -#endif - #define EXC_REAL_NONE(start, size) \ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size) @@ -271,6 +263,9 @@ do_define_int n .endm .macro GEN_KVM name + .balign IFETCH_ALIGN_BYTES +\name\()_kvm: + .if IKVM_SKIP cmpwi r10,KVM_GUEST_MODE_SKIP beq 89f @@ -281,13 +276,18 @@ BEGIN_FTR_SECTION_NESTED(947) END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) .endif + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 BEGIN_FTR_SECTION_NESTED(948) ld r10,IAREA+EX_PPR(r13) std r10,HSTATE_PPR(r13) END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) - ld r10,IAREA+EX_R10(r13) + ld r11,IAREA+EX_R11(r13) + ld r12,IAREA+EX_R12(r13) std r12,HSTATE_SCRATCH0(r13) sldi r12,r9,32 + ld r9,IAREA+EX_R9(r13) + ld r10,IAREA+EX_R10(r13) /* HSRR variants have the 0x2 bit added to their trap number */ .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION @@ -300,29 +300,16 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .else ori r12,r12,(IVEC) .endif - -#ifdef CONFIG_RELOCATABLE - /* - * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives - * outside the head section. CONFIG_RELOCATABLE KVM expects CTR - * to be saved in HSTATE_SCRATCH1. - */ - ld r9,IAREA+EX_CTR(r13) - std r9,HSTATE_SCRATCH1(r13) - __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) - mtctr r9 - ld r9,IAREA+EX_R9(r13) - bctr -#else - ld r9,IAREA+EX_R9(r13) b kvmppc_interrupt -#endif - .if IKVM_SKIP 89: mtocrf 0x80,r9 + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) + ld r11,IAREA+EX_R11(r13) + ld r12,IAREA+EX_R12(r13) .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION b kvmppc_skip_Hinterrupt @@ -407,11 +394,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) mfctr r10 std r10,IAREA+EX_CTR(r13) mfcr r9 - - .if (!\virt && IKVM_REAL) || (\virt && IKVM_VIRT) - KVMTEST \name IHSRR IVEC - .endif - std r11,IAREA+EX_R11(r13) std r12,IAREA+EX_R12(r13) @@ -469,12 +451,18 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) /* * __GEN_COMMON_ENTRY is required to receive the branch from interrupt - * entry, except in the case of the IEARLY handlers. + * entry, except in the case of the real-mode handlers which require + * __GEN_REALMODE_COMMON_ENTRY. + * * This switches to virtual mode and sets MSR[RI]. */ .macro __GEN_COMMON_ENTRY name DEFINE_FIXED_SYMBOL(\name\()_common_real) \name\()_common_real: + .if IKVM_REAL + KVMTEST \name IHSRR IVEC + .endif + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ /* MSR[RI] is clear iff using SRR regs */ .if IHSRR == EXC_HV_OR_STD @@ -487,12 +475,32 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) mtmsrd r10 .if IVIRT + .if IKVM_VIRT + b 1f /* skip the virt test coming from real */ + .endif + .balign IFETCH_ALIGN_BYTES DEFINE_FIXED_SYMBOL(\name\()_common_virt) \name\()_common_virt: + .if IKVM_VIRT + KVMTEST \name IHSRR IVEC +1: + .endif .endif /* IVIRT */ .endm +/* + * Don't switch to virt mode. Used for early MCE and HMI handlers that + * want to run in real mode. + */ +.macro __GEN_REALMODE_COMMON_ENTRY name +DEFINE_FIXED_SYMBOL(\name\()_common_real) +\name\()_common_real: + .if IKVM_REAL + KVMTEST \name IHSRR IVEC + .endif +.endm + .macro __GEN_COMMON_BODY name .if IMASK lbz r10,PACAIRQSOFTMASK(r13) @@ -848,8 +856,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) */ EXC_REAL_END(system_reset, 0x100, 0x100) EXC_VIRT_NONE(0x4100, 0x100) -TRAMP_KVM_BEGIN(system_reset_kvm) - GEN_KVM system_reset #ifdef CONFIG_PPC_P7_NAP TRAMP_REAL_BEGIN(system_reset_idle_wake) @@ -927,6 +933,8 @@ EXC_COMMON_BEGIN(system_reset_common) EXCEPTION_RESTORE_REGS EXC_STD RFI_TO_USER_OR_KERNEL + GEN_KVM system_reset + INT_DEFINE_BEGIN(machine_check_early) IVEC=0x200 @@ -968,9 +976,6 @@ TRAMP_REAL_BEGIN(machine_check_fwnmi) GEN_INT_ENTRY machine_check_early, virt=0 #endif -TRAMP_KVM_BEGIN(machine_check_kvm) - GEN_KVM machine_check - #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ li r9,0; \ @@ -985,6 +990,8 @@ EXC_COMMON_BEGIN(machine_check_early_common) mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 + __GEN_REALMODE_COMMON_ENTRY machine_check_early + /* * Switch to mc_emergency stack and handle re-entrancy (we limit * the nested MCE upto level 4 to avoid stack overflow). @@ -1126,6 +1133,9 @@ EXC_COMMON_BEGIN(machine_check_common) bl machine_check_exception b ret_from_except + GEN_KVM machine_check + + #ifdef CONFIG_PPC_P7_NAP /* * This is an idle wakeup. Low level machine check has already been @@ -1218,8 +1228,6 @@ EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) GEN_INT_ENTRY data_access, virt=1 EXC_VIRT_END(data_access, 0x4300, 0x80) -TRAMP_KVM_BEGIN(data_access_kvm) - GEN_KVM data_access EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DAR(r1) @@ -1232,6 +1240,8 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + GEN_KVM data_access + INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 @@ -1248,8 +1258,6 @@ EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) GEN_INT_ENTRY data_access_slb, virt=1 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) -TRAMP_KVM_BEGIN(data_access_slb_kvm) - GEN_KVM data_access_slb EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb ld r4,_DAR(r1) @@ -1274,6 +1282,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b ret_from_except + GEN_KVM data_access_slb + INT_DEFINE_BEGIN(instruction_access) IVEC=0x400 @@ -1289,8 +1299,6 @@ EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) GEN_INT_ENTRY instruction_access, virt=1 EXC_VIRT_END(instruction_access, 0x4400, 0x80) -TRAMP_KVM_BEGIN(instruction_access_kvm) - GEN_KVM instruction_access EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access ld r4,_DAR(r1) @@ -1303,6 +1311,8 @@ MMU_FTR_SECTION_ELSE b handle_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + GEN_KVM instruction_access + INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 @@ -1319,8 +1329,6 @@ EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) GEN_INT_ENTRY instruction_access_slb, virt=1 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) -TRAMP_KVM_BEGIN(instruction_access_slb_kvm) - GEN_KVM instruction_access_slb EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb ld r4,_DAR(r1) @@ -1345,6 +1353,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) bl do_bad_slb_fault b ret_from_except + GEN_KVM instruction_access_slb + + INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 IHSRR=EXC_HV_OR_STD @@ -1359,8 +1370,6 @@ EXC_REAL_END(hardware_interrupt, 0x500, 0x100) EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) GEN_INT_ENTRY hardware_interrupt, virt=1 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) -TRAMP_KVM_BEGIN(hardware_interrupt_kvm) - GEN_KVM hardware_interrupt EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt FINISH_NAP @@ -1369,6 +1378,8 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) bl do_IRQ b ret_from_except_lite + GEN_KVM hardware_interrupt + INT_DEFINE_BEGIN(alignment) IVEC=0x600 @@ -1383,8 +1394,6 @@ EXC_REAL_END(alignment, 0x600, 0x100) EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) GEN_INT_ENTRY alignment, virt=1 EXC_VIRT_END(alignment, 0x4600, 0x100) -TRAMP_KVM_BEGIN(alignment_kvm) - GEN_KVM alignment EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment bl save_nvgprs @@ -1392,6 +1401,8 @@ EXC_COMMON_BEGIN(alignment_common) bl alignment_exception b ret_from_except + GEN_KVM alignment + INT_DEFINE_BEGIN(program_check) IVEC=0x700 @@ -1404,8 +1415,6 @@ EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) GEN_INT_ENTRY program_check, virt=1 EXC_VIRT_END(program_check, 0x4700, 0x100) -TRAMP_KVM_BEGIN(program_check_kvm) - GEN_KVM program_check EXC_COMMON_BEGIN(program_check_common) __GEN_COMMON_ENTRY program_check @@ -1445,6 +1454,8 @@ EXC_COMMON_BEGIN(program_check_common) bl program_check_exception b ret_from_except + GEN_KVM program_check + INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 @@ -1458,8 +1469,6 @@ EXC_REAL_END(fp_unavailable, 0x800, 0x100) EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100) GEN_INT_ENTRY fp_unavailable, virt=1 EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) -TRAMP_KVM_BEGIN(fp_unavailable_kvm) - GEN_KVM fp_unavailable EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ @@ -1490,6 +1499,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b ret_from_except #endif + GEN_KVM fp_unavailable + INT_DEFINE_BEGIN(decrementer) IVEC=0x900 @@ -1503,8 +1514,6 @@ EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) GEN_INT_ENTRY decrementer, virt=1 EXC_VIRT_END(decrementer, 0x4900, 0x80) -TRAMP_KVM_BEGIN(decrementer_kvm) - GEN_KVM decrementer EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer FINISH_NAP @@ -1513,6 +1522,8 @@ EXC_COMMON_BEGIN(decrementer_common) bl timer_interrupt b ret_from_except_lite + GEN_KVM decrementer + INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 @@ -1527,8 +1538,6 @@ EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) GEN_INT_ENTRY hdecrementer, virt=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) -TRAMP_KVM_BEGIN(hdecrementer_kvm) - GEN_KVM hdecrementer EXC_COMMON_BEGIN(hdecrementer_common) GEN_COMMON hdecrementer bl save_nvgprs @@ -1536,6 +1545,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) bl hdec_interrupt b ret_from_except + GEN_KVM hdecrementer + INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 @@ -1549,8 +1560,6 @@ EXC_REAL_END(doorbell_super, 0xa00, 0x100) EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) GEN_INT_ENTRY doorbell_super, virt=1 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) -TRAMP_KVM_BEGIN(doorbell_super_kvm) - GEN_KVM doorbell_super EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super FINISH_NAP @@ -1563,6 +1572,8 @@ EXC_COMMON_BEGIN(doorbell_super_common) #endif b ret_from_except_lite + GEN_KVM doorbell_super + EXC_REAL_NONE(0xb00, 0x100) EXC_VIRT_NONE(0x4b00, 0x100) @@ -1667,6 +1678,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) EXC_VIRT_END(system_call, 0x4c00, 0x100) #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +TRAMP_REAL_BEGIN(system_call_kvm) /* * This is a hcall, so register convention is as above, with these * differences: @@ -1674,20 +1686,35 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100) * ctr = orig r13 * orig r10 saved in PACA */ -TRAMP_KVM_BEGIN(system_call_kvm) /* * Save the PPR (on systems that support it) before changing to * HMT_MEDIUM. That allows the KVM code to save that value into the * guest state (it is the guest's PPR value). */ - OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR) +BEGIN_FTR_SECTION_NESTED(948) + mfspr r10,SPRN_PPR + std r10,HSTATE_PPR(r13) +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) HMT_MEDIUM - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR) mfctr r10 SET_SCRATCH0(r10) - std r9,PACA_EXGEN+EX_R9(r13) - mfcr r9 - GEN_KVM system_call + mfcr r10 + std r12,HSTATE_SCRATCH0(r13) + sldi r12,r10,32 + ori r12,r12,0xc00 +#ifdef CONFIG_RELOCATABLE + /* + * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives + * outside the head section. + */ + __LOAD_FAR_HANDLER(r10, kvmppc_interrupt) + mtctr r10 + ld r10,PACA_EXGEN+EX_R10(r13) + bctr +#else + ld r10,PACA_EXGEN+EX_R10(r13) + b kvmppc_interrupt +#endif #endif @@ -1702,8 +1729,6 @@ EXC_REAL_END(single_step, 0xd00, 0x100) EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) GEN_INT_ENTRY single_step, virt=1 EXC_VIRT_END(single_step, 0x4d00, 0x100) -TRAMP_KVM_BEGIN(single_step_kvm) - GEN_KVM single_step EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step bl save_nvgprs @@ -1711,6 +1736,8 @@ EXC_COMMON_BEGIN(single_step_common) bl single_step_exception b ret_from_except + GEN_KVM single_step + INT_DEFINE_BEGIN(h_data_storage) IVEC=0xe00 @@ -1728,8 +1755,6 @@ EXC_REAL_END(h_data_storage, 0xe00, 0x20) EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) GEN_INT_ENTRY h_data_storage, virt=1, ool=1 EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) -TRAMP_KVM_BEGIN(h_data_storage_kvm) - GEN_KVM h_data_storage EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage bl save_nvgprs @@ -1743,6 +1768,8 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b ret_from_except + GEN_KVM h_data_storage + INT_DEFINE_BEGIN(h_instr_storage) IVEC=0xe20 @@ -1757,8 +1784,6 @@ EXC_REAL_END(h_instr_storage, 0xe20, 0x20) EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) GEN_INT_ENTRY h_instr_storage, virt=1, ool=1 EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) -TRAMP_KVM_BEGIN(h_instr_storage_kvm) - GEN_KVM h_instr_storage EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage bl save_nvgprs @@ -1766,6 +1791,8 @@ EXC_COMMON_BEGIN(h_instr_storage_common) bl unknown_exception b ret_from_except + GEN_KVM h_instr_storage + INT_DEFINE_BEGIN(emulation_assist) IVEC=0xe40 @@ -1780,8 +1807,6 @@ EXC_REAL_END(emulation_assist, 0xe40, 0x20) EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) GEN_INT_ENTRY emulation_assist, virt=1, ool=1 EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) -TRAMP_KVM_BEGIN(emulation_assist_kvm) - GEN_KVM emulation_assist EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist bl save_nvgprs @@ -1789,6 +1814,8 @@ EXC_COMMON_BEGIN(emulation_assist_common) bl emulation_assist_interrupt b ret_from_except + GEN_KVM emulation_assist + /* * hmi_exception trampoline is a special case. It jumps to hmi_exception_early @@ -1816,14 +1843,13 @@ EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20) GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1 EXC_REAL_END(hmi_exception, 0xe60, 0x20) EXC_VIRT_NONE(0x4e60, 0x20) -TRAMP_KVM_BEGIN(hmi_exception_early_kvm) - GEN_KVM hmi_exception_early -TRAMP_KVM_BEGIN(hmi_exception_kvm) - GEN_KVM hmi_exception EXC_COMMON_BEGIN(hmi_exception_early_common) mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + + __GEN_REALMODE_COMMON_ENTRY hmi_exception_early + mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ @@ -1846,6 +1872,8 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXCEPTION_RESTORE_REGS EXC_HV GEN_INT_ENTRY hmi_exception, virt=0 + GEN_KVM hmi_exception_early + EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception FINISH_NAP @@ -1855,6 +1883,8 @@ EXC_COMMON_BEGIN(hmi_exception_common) bl handle_hmi_exception b ret_from_except + GEN_KVM hmi_exception + INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 @@ -1870,8 +1900,6 @@ EXC_REAL_END(h_doorbell, 0xe80, 0x20) EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) GEN_INT_ENTRY h_doorbell, virt=1, ool=1 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) -TRAMP_KVM_BEGIN(h_doorbell_kvm) - GEN_KVM h_doorbell EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell FINISH_NAP @@ -1884,6 +1912,8 @@ EXC_COMMON_BEGIN(h_doorbell_common) #endif b ret_from_except_lite + GEN_KVM h_doorbell + INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 @@ -1899,8 +1929,6 @@ EXC_REAL_END(h_virt_irq, 0xea0, 0x20) EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) GEN_INT_ENTRY h_virt_irq, virt=1, ool=1 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) -TRAMP_KVM_BEGIN(h_virt_irq_kvm) - GEN_KVM h_virt_irq EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq FINISH_NAP @@ -1909,6 +1937,8 @@ EXC_COMMON_BEGIN(h_virt_irq_common) bl do_IRQ b ret_from_except_lite + GEN_KVM h_virt_irq + EXC_REAL_NONE(0xec0, 0x20) EXC_VIRT_NONE(0x4ec0, 0x20) @@ -1928,8 +1958,6 @@ EXC_REAL_END(performance_monitor, 0xf00, 0x20) EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) GEN_INT_ENTRY performance_monitor, virt=1, ool=1 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) -TRAMP_KVM_BEGIN(performance_monitor_kvm) - GEN_KVM performance_monitor EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor FINISH_NAP @@ -1938,6 +1966,8 @@ EXC_COMMON_BEGIN(performance_monitor_common) bl performance_monitor_exception b ret_from_except_lite + GEN_KVM performance_monitor + INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 @@ -1951,8 +1981,6 @@ EXC_REAL_END(altivec_unavailable, 0xf20, 0x20) EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20) GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1 EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20) -TRAMP_KVM_BEGIN(altivec_unavailable_kvm) - GEN_KVM altivec_unavailable EXC_COMMON_BEGIN(altivec_unavailable_common) GEN_COMMON altivec_unavailable #ifdef CONFIG_ALTIVEC @@ -1986,6 +2014,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl altivec_unavailable_exception b ret_from_except + GEN_KVM altivec_unavailable + INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 @@ -1999,8 +2029,6 @@ EXC_REAL_END(vsx_unavailable, 0xf40, 0x20) EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20) GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1 EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20) -TRAMP_KVM_BEGIN(vsx_unavailable_kvm) - GEN_KVM vsx_unavailable EXC_COMMON_BEGIN(vsx_unavailable_common) GEN_COMMON vsx_unavailable #ifdef CONFIG_VSX @@ -2033,6 +2061,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except + GEN_KVM vsx_unavailable + INT_DEFINE_BEGIN(facility_unavailable) IVEC=0xf60 @@ -2045,8 +2075,6 @@ EXC_REAL_END(facility_unavailable, 0xf60, 0x20) EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) GEN_INT_ENTRY facility_unavailable, virt=1, ool=1 EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) -TRAMP_KVM_BEGIN(facility_unavailable_kvm) - GEN_KVM facility_unavailable EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable bl save_nvgprs @@ -2054,6 +2082,8 @@ EXC_COMMON_BEGIN(facility_unavailable_common) bl facility_unavailable_exception b ret_from_except + GEN_KVM facility_unavailable + INT_DEFINE_BEGIN(h_facility_unavailable) IVEC=0xf80 @@ -2068,8 +2098,6 @@ EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20) EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1 EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) -TRAMP_KVM_BEGIN(h_facility_unavailable_kvm) - GEN_KVM h_facility_unavailable EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable bl save_nvgprs @@ -2077,6 +2105,8 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) bl facility_unavailable_exception b ret_from_except + GEN_KVM h_facility_unavailable + EXC_REAL_NONE(0xfa0, 0x20) EXC_VIRT_NONE(0x4fa0, 0x20) @@ -2102,14 +2132,15 @@ EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) GEN_INT_ENTRY cbe_system_error, virt=0 EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -TRAMP_KVM_BEGIN(cbe_system_error_kvm) - GEN_KVM cbe_system_error EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception b ret_from_except + + GEN_KVM cbe_system_error + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) @@ -2128,8 +2159,6 @@ EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100) EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) GEN_INT_ENTRY instruction_breakpoint, virt=1 EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) -TRAMP_KVM_BEGIN(instruction_breakpoint_kvm) - GEN_KVM instruction_breakpoint EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint bl save_nvgprs @@ -2137,6 +2166,8 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) bl instruction_breakpoint_exception b ret_from_except + GEN_KVM instruction_breakpoint + EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) @@ -2145,6 +2176,7 @@ INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=EXC_HV IEARLY=2 + IKVM_REAL=1 INT_DEFINE_END(denorm_exception) EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) @@ -2154,7 +2186,6 @@ EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist #endif - KVMTEST denorm_exception, EXC_HV, 0x1500 mfspr r11,SPRN_HSRR0 mfspr r12,SPRN_HSRR1 GEN_BRANCH_TO_COMMON denorm_exception, virt=0 @@ -2172,8 +2203,6 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else EXC_VIRT_NONE(0x5500, 0x100) #endif -TRAMP_KVM_BEGIN(denorm_exception_kvm) - GEN_KVM denorm_exception #ifdef CONFIG_PPC_DENORMALISATION TRAMP_REAL_BEGIN(denorm_assist) @@ -2251,6 +2280,8 @@ EXC_COMMON_BEGIN(denorm_exception_common) bl unknown_exception b ret_from_except + GEN_KVM denorm_exception + #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_maintenance) @@ -2264,14 +2295,15 @@ EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) GEN_INT_ENTRY cbe_maintenance, virt=0 EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -TRAMP_KVM_BEGIN(cbe_maintenance_kvm) - GEN_KVM cbe_maintenance EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception b ret_from_except + + GEN_KVM cbe_maintenance + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) @@ -2289,8 +2321,6 @@ EXC_REAL_END(altivec_assist, 0x1700, 0x100) EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) GEN_INT_ENTRY altivec_assist, virt=1 EXC_VIRT_END(altivec_assist, 0x5700, 0x100) -TRAMP_KVM_BEGIN(altivec_assist_kvm) - GEN_KVM altivec_assist EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist bl save_nvgprs @@ -2302,6 +2332,8 @@ EXC_COMMON_BEGIN(altivec_assist_common) #endif b ret_from_except + GEN_KVM altivec_assist + #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_thermal) @@ -2315,14 +2347,15 @@ EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) GEN_INT_ENTRY cbe_thermal, virt=0 EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -TRAMP_KVM_BEGIN(cbe_thermal_kvm) - GEN_KVM cbe_thermal EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception b ret_from_except + + GEN_KVM cbe_thermal + #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) @@ -2514,17 +2547,12 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid -/* - * Real mode exceptions actually use this too, but alternate - * instruction code patches (which end up in the common .text area) - * cannot reach these if they are put there. - */ USE_TEXT_SECTION() MASKED_INTERRUPT EXC_STD MASKED_INTERRUPT EXC_HV #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) +kvmppc_skip_interrupt: /* * Here all GPRs are unchanged from when the interrupt happened * except for r13, which is saved in SPRG_SCRATCH0. @@ -2536,7 +2564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) RFI_TO_KERNEL b . -TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) +kvmppc_skip_Hinterrupt: /* * Here all GPRs are unchanged from when the interrupt happened * except for r13, which is saved in SPRG_SCRATCH0. @@ -2549,16 +2577,6 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) b . #endif -/* - * Ensure that any handlers that get invoked from the exception prologs - * above are below the first 64KB (0x10000) of the kernel image because - * the prologs assemble the addresses of these handlers using the - * LOAD_HANDLER macro, which uses an ori instruction. - */ - -/*** Common interrupt handlers ***/ - - /* * Relocation-on interrupts: A subset of the interrupts can be delivered * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index dbc2fecc37f0..780a499c7114 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1266,7 +1266,6 @@ kvmppc_interrupt_hv: * R12 = (guest CR << 32) | interrupt vector * R13 = PACA * guest R12 saved in shadow VCPU SCRATCH0 - * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE * guest R13 saved in SPRN_SCRATCH0 */ std r9, HSTATE_SCRATCH2(r13) @@ -1367,12 +1366,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ -#ifdef CONFIG_RELOCATABLE - ld r3, HSTATE_SCRATCH1(r13) - mtctr r3 -#else mfctr r3 -#endif mfxer r4 std r3, VCPU_CTR(r9) std r4, VCPU_XER(r9) @@ -3258,7 +3252,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) * r12 is (CR << 32) | vector * r13 points to our PACA * r12 is saved in HSTATE_SCRATCH0(r13) - * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE * r9 is saved in HSTATE_SCRATCH2(r13) * r13 is saved in HSPRG1 * cfar is saved in HSTATE_CFAR(r13) @@ -3307,11 +3300,7 @@ kvmppc_bad_host_intr: ld r5, HSTATE_CFAR(r13) std r5, ORIG_GPR3(r1) mflr r3 -#ifdef CONFIG_RELOCATABLE - ld r4, HSTATE_SCRATCH1(r13) -#else mfctr r4 -#endif mfxer r5 lbz r6, PACAIRQSOFTMASK(r13) std r3, _LINK(r1) diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 0169bab544dd..1f492aa4c8d6 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -167,16 +167,9 @@ kvmppc_interrupt_pr: * R12 = (guest CR << 32) | exit handler id * R13 = PACA * HSTATE.SCRATCH0 = guest R12 - * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE */ #ifdef CONFIG_PPC64 /* Match 32-bit entry */ -#ifdef CONFIG_RELOCATABLE - std r9, HSTATE_SCRATCH2(r13) - ld r9, HSTATE_SCRATCH1(r13) - mtctr r9 - ld r9, HSTATE_SCRATCH2(r13) -#endif rotldi r12, r12, 32 /* Flip R12 halves for stw */ stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ srdi r12, r12, 32 /* shift trap into low half */ -- cgit v1.2.3-59-g8ed1b From d73a10cbf98f868586e907ef9d953f9fc7ae369c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:22 +1000 Subject: powerpc/64s/exception: Remove confusing IEARLY option Replace IEARLY=1 and IEARLY=2 with IBRANCH_COMMON, which controls if the entry code branches to a common handler; and IREALMODE_COMMON, which controls whether the common handler should remain in real mode. These special cases no longer avoid loading the SRR registers, there is no point as most of them load the registers immediately anyway. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-14-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 49 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1bc73acceb9a..36096043164d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -174,7 +174,8 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define IDAR .L_IDAR_\name\() #define IDSISR .L_IDSISR_\name\() #define ISET_RI .L_ISET_RI_\name\() -#define IEARLY .L_IEARLY_\name\() +#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() +#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() #define IMASK .L_IMASK_\name\() #define IKVM_SKIP .L_IKVM_SKIP_\name\() #define IKVM_REAL .L_IKVM_REAL_\name\() @@ -218,8 +219,15 @@ do_define_int n .ifndef ISET_RI ISET_RI=1 .endif - .ifndef IEARLY - IEARLY=0 + .ifndef IBRANCH_TO_COMMON + IBRANCH_TO_COMMON=1 + .endif + .ifndef IREALMODE_COMMON + IREALMODE_COMMON=0 + .else + .if ! IBRANCH_TO_COMMON + .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0" + .endif .endif .ifndef IMASK IMASK=0 @@ -353,6 +361,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) */ .macro GEN_BRANCH_TO_COMMON name, virt + .if IREALMODE_COMMON + LOAD_HANDLER(r10, \name\()_common) + mtctr r10 + bctr + .else .if \virt #ifndef CONFIG_RELOCATABLE b \name\()_common_virt @@ -366,6 +379,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) mtctr r10 bctr .endif + .endif .endm .macro GEN_INT_ENTRY name, virt, ool=0 @@ -421,11 +435,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) stw r10,IAREA+EX_DSISR(r13) .endif - .if IEARLY == 2 - /* nothing more */ - .elseif IEARLY - BRANCH_TO_C000(r11, \name\()_common) - .else .if IHSRR == EXC_HV_OR_STD BEGIN_FTR_SECTION mfspr r11,SPRN_HSRR0 /* save HSRR0 */ @@ -441,6 +450,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) mfspr r11,SPRN_SRR0 /* save SRR0 */ mfspr r12,SPRN_SRR1 /* and SRR1 */ .endif + + .if IBRANCH_TO_COMMON GEN_BRANCH_TO_COMMON \name \virt .endif @@ -940,6 +951,7 @@ INT_DEFINE_BEGIN(machine_check_early) IVEC=0x200 IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ + IREALMODE_COMMON=1 /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables @@ -947,7 +959,6 @@ INT_DEFINE_BEGIN(machine_check_early) */ ISET_RI=0 ISTACK=0 - IEARLY=1 IDAR=1 IDSISR=1 IRECONCILE=0 @@ -987,9 +998,6 @@ TRAMP_REAL_BEGIN(machine_check_fwnmi) EXCEPTION_RESTORE_REGS EXC_STD EXC_COMMON_BEGIN(machine_check_early_common) - mfspr r11,SPRN_SRR0 - mfspr r12,SPRN_SRR1 - __GEN_REALMODE_COMMON_ENTRY machine_check_early /* @@ -1825,7 +1833,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) INT_DEFINE_BEGIN(hmi_exception_early) IVEC=0xe60 IHSRR=EXC_HV - IEARLY=1 + IREALMODE_COMMON=1 ISTACK=0 IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ @@ -1845,9 +1853,6 @@ EXC_REAL_END(hmi_exception, 0xe60, 0x20) EXC_VIRT_NONE(0x4e60, 0x20) EXC_COMMON_BEGIN(hmi_exception_early_common) - mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ - __GEN_REALMODE_COMMON_ENTRY hmi_exception_early mr r10,r1 /* Save r1 */ @@ -2175,29 +2180,23 @@ EXC_VIRT_NONE(0x5400, 0x100) INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=EXC_HV - IEARLY=2 + IBRANCH_TO_COMMON=0 IKVM_REAL=1 INT_DEFINE_END(denorm_exception) EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100) GEN_INT_ENTRY denorm_exception, virt=0 #ifdef CONFIG_PPC_DENORMALISATION - mfspr r10,SPRN_HSRR1 - andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist #endif - mfspr r11,SPRN_HSRR0 - mfspr r12,SPRN_HSRR1 GEN_BRANCH_TO_COMMON denorm_exception, virt=0 EXC_REAL_END(denorm_exception, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) GEN_INT_ENTRY denorm_exception, virt=1 - mfspr r10,SPRN_HSRR1 - andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */ bne+ denorm_assist - mfspr r11,SPRN_HSRR0 - mfspr r12,SPRN_HSRR1 GEN_BRANCH_TO_COMMON denorm_exception, virt=1 EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else -- cgit v1.2.3-59-g8ed1b From 931dc86b3a965add65b0e04b6a0754083df1ab1f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:23 +1000 Subject: powerpc/64s/exception: Remove the SPR saving patch code macros These are used infrequently enough they don't provide much help, so inline them. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-15-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 82 ++++++++++++------------------------ 1 file changed, 28 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 36096043164d..0ea5ba1d0ca0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -110,46 +110,6 @@ name: #define EXC_HV 1 #define EXC_STD 0 -/* - * PPR save/restore macros used in exceptions-64s.S - * Used for P7 or later processors - */ -#define SAVE_PPR(area, ra) \ -BEGIN_FTR_SECTION_NESTED(940) \ - ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \ - std ra,_PPR(r1); \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) - -#define RESTORE_PPR_PACA(area, ra) \ -BEGIN_FTR_SECTION_NESTED(941) \ - ld ra,area+EX_PPR(r13); \ - mtspr SPRN_PPR,ra; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) - -/* - * Get an SPR into a register if the CPU has the given feature - */ -#define OPT_GET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mfspr ra,spr; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Set an SPR from a register if the CPU has the given feature - */ -#define OPT_SET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mtspr spr,ra; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Save a register to the PACA if the CPU has the given feature - */ -#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - std ra,offset(r13); \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - /* * Branch to label using its 0xC000 address. This results in instruction * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned @@ -278,18 +238,18 @@ do_define_int n cmpwi r10,KVM_GUEST_MODE_SKIP beq 89f .else -BEGIN_FTR_SECTION_NESTED(947) +BEGIN_FTR_SECTION ld r10,IAREA+EX_CFAR(r13) std r10,HSTATE_CFAR(r13) -END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif ld r10,PACA_EXGEN+EX_CTR(r13) mtctr r10 -BEGIN_FTR_SECTION_NESTED(948) +BEGIN_FTR_SECTION ld r10,IAREA+EX_PPR(r13) std r10,HSTATE_PPR(r13) -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r11,IAREA+EX_R11(r13) ld r12,IAREA+EX_R12(r13) std r12,HSTATE_SCRATCH0(r13) @@ -386,10 +346,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) SET_SCRATCH0(r13) /* save r13 */ GET_PACA(r13) std r9,IAREA+EX_R9(r13) /* save r9 */ - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) +BEGIN_FTR_SECTION + mfspr r9,SPRN_PPR +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ - OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +BEGIN_FTR_SECTION + mfspr r10,SPRN_CFAR +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .if \ool .if !\virt b tramp_real_\name @@ -402,8 +366,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) .endif .endif - OPT_SAVE_REG_TO_PACA(IAREA+EX_PPR, r9, CPU_FTR_HAS_PPR) - OPT_SAVE_REG_TO_PACA(IAREA+EX_CFAR, r10, CPU_FTR_CFAR) +BEGIN_FTR_SECTION + std r9,IAREA+EX_PPR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) +BEGIN_FTR_SECTION + std r10,IAREA+EX_CFAR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) INTERRUPT_TO_KERNEL mfctr r10 std r10,IAREA+EX_CTR(r13) @@ -572,7 +540,10 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .endif beq 101f /* if from kernel mode */ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10) - SAVE_PPR(IAREA, r9) +BEGIN_FTR_SECTION + ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */ + std r9,_PPR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 101: .else .if IKUAP @@ -612,10 +583,10 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) std r10,_DSISR(r1) .endif -BEGIN_FTR_SECTION_NESTED(66) +BEGIN_FTR_SECTION ld r10,IAREA+EX_CFAR(r13) std r10,ORIG_GPR3(r1) -END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ @@ -1699,10 +1670,10 @@ TRAMP_REAL_BEGIN(system_call_kvm) * HMT_MEDIUM. That allows the KVM code to save that value into the * guest state (it is the guest's PPR value). */ -BEGIN_FTR_SECTION_NESTED(948) +BEGIN_FTR_SECTION mfspr r10,SPRN_PPR std r10,HSTATE_PPR(r13) -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM mfctr r10 SET_SCRATCH0(r10) @@ -2259,7 +2230,10 @@ denorm_done: mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) - RESTORE_PPR_PACA(PACA_EXGEN, r10) +BEGIN_FTR_SECTION + ld r10,PACA_EXGEN+EX_PPR(r13) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) BEGIN_FTR_SECTION ld r10,PACA_EXGEN+EX_CFAR(r13) mtspr SPRN_CFAR,r10 -- cgit v1.2.3-59-g8ed1b From 9d598f934470dc455dac01f0dcbebecf1604606f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:24 +1000 Subject: powerpc/64s/exception: Trim unused arguments from KVMTEST macro Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-16-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0ea5ba1d0ca0..da15ec4fc8cb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -224,7 +224,7 @@ do_define_int n #define kvmppc_interrupt kvmppc_interrupt_pr #endif -.macro KVMTEST name, hsrr, n +.macro KVMTEST name lbz r10,HSTATE_IN_GUEST(r13) cmpwi r10,0 bne \name\()_kvm @@ -293,7 +293,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .endm #else -.macro KVMTEST name, hsrr, n +.macro KVMTEST name .endm .macro GEN_KVM name .endm @@ -439,7 +439,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) DEFINE_FIXED_SYMBOL(\name\()_common_real) \name\()_common_real: .if IKVM_REAL - KVMTEST \name IHSRR IVEC + KVMTEST \name .endif ld r10,PACAKMSR(r13) /* get MSR value for kernel */ @@ -462,7 +462,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) DEFINE_FIXED_SYMBOL(\name\()_common_virt) \name\()_common_virt: .if IKVM_VIRT - KVMTEST \name IHSRR IVEC + KVMTEST \name 1: .endif .endif /* IVIRT */ @@ -476,7 +476,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt) DEFINE_FIXED_SYMBOL(\name\()_common_real) \name\()_common_real: .if IKVM_REAL - KVMTEST \name IHSRR IVEC + KVMTEST \name .endif .endm @@ -1598,7 +1598,7 @@ INT_DEFINE_END(system_call) GET_PACA(r13) std r10,PACA_EXGEN+EX_R10(r13) INTERRUPT_TO_KERNEL - KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */ + KVMTEST system_call /* uses r10, branch to system_call_kvm */ mfctr r9 #else mr r9,r13 -- cgit v1.2.3-59-g8ed1b From 2babd6ea43edacfc1577432baa187a7d212f3f4f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:25 +1000 Subject: powerpc/64s/exception: Avoid touching the stack in hdecrementer The hdec interrupt handler is reported to sometimes fire in Linux if KVM leaves it pending after a guest exists. This is harmless, so there is a no-op handler for it. The interrupt handler currently uses the regular kernel stack. Change this to avoid touching the stack entirely. This should be the last place where the regular Linux stack can be accessed with asynchronous interrupts (including PMI) soft-masked. It might be possible to take advantage of this invariant, e.g., to context switch the kernel stack SLB entry without clearing MSR[EE]. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-17-npiggin@gmail.com --- arch/powerpc/include/asm/time.h | 1 - arch/powerpc/kernel/exceptions-64s.S | 25 ++++++++++++++++++++----- arch/powerpc/kernel/time.c | 9 --------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 08dbe3e6831c..e0107495c4de 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -24,7 +24,6 @@ extern struct clock_event_device decrementer_clockevent; extern void generic_calibrate_decr(void); -extern void hdec_interrupt(struct pt_regs *regs); /* Some sane defaults: 125 MHz timebase, 1GHz processor */ extern unsigned long ppc_proc_freq; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index da15ec4fc8cb..146afa0922fb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1507,6 +1507,8 @@ EXC_COMMON_BEGIN(decrementer_common) INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 IHSRR=EXC_HV + ISTACK=0 + IRECONCILE=0 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(hdecrementer) @@ -1518,11 +1520,24 @@ EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) GEN_INT_ENTRY hdecrementer, virt=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) EXC_COMMON_BEGIN(hdecrementer_common) - GEN_COMMON hdecrementer - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl hdec_interrupt - b ret_from_except + __GEN_COMMON_ENTRY hdecrementer + /* + * Hypervisor decrementer interrupts not caught by the KVM test + * shouldn't occur but are sometimes left pending on exit from a KVM + * guest. We don't need to do anything to clear them, as they are + * edge-triggered. + * + * Be careful to avoid touching the kernel stack. + */ + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + HRFI_TO_KERNEL GEN_KVM hdecrementer diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1168e8b37e30..bda9cb4a0a5f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -663,15 +663,6 @@ void timer_broadcast_interrupt(void) } #endif -/* - * Hypervisor decrementer interrupts shouldn't occur but are sometimes - * left pending on exit from a KVM guest. We don't need to do anything - * to clear them, as they are edge-triggered. - */ -void hdec_interrupt(struct pt_regs *regs) -{ -} - #ifdef CONFIG_SUSPEND static void generic_suspend_disable_irqs(void) { -- cgit v1.2.3-59-g8ed1b From 689e7322627c7305dbbb86131ac9e6fe90fc8c99 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:26 +1000 Subject: powerpc/64s/exception: Re-inline some handlers The reduction in interrupt entry size allows some handlers to be re-inlined. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-18-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 146afa0922fb..670a9ee37b9a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1202,7 +1202,7 @@ INT_DEFINE_BEGIN(data_access) INT_DEFINE_END(data_access) EXC_REAL_BEGIN(data_access, 0x300, 0x80) - GEN_INT_ENTRY data_access, virt=0, ool=1 + GEN_INT_ENTRY data_access, virt=0 EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) GEN_INT_ENTRY data_access, virt=1 @@ -1232,7 +1232,7 @@ INT_DEFINE_BEGIN(data_access_slb) INT_DEFINE_END(data_access_slb) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) - GEN_INT_ENTRY data_access_slb, virt=0, ool=1 + GEN_INT_ENTRY data_access_slb, virt=0 EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) GEN_INT_ENTRY data_access_slb, virt=1 @@ -1488,7 +1488,7 @@ INT_DEFINE_BEGIN(decrementer) INT_DEFINE_END(decrementer) EXC_REAL_BEGIN(decrementer, 0x900, 0x80) - GEN_INT_ENTRY decrementer, virt=0, ool=1 + GEN_INT_ENTRY decrementer, virt=0 EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) GEN_INT_ENTRY decrementer, virt=1 -- cgit v1.2.3-59-g8ed1b From 3f7fbd97d07d6e724ac34f3fce1031977944bca0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:27 +1000 Subject: powerpc/64s/exception: Clean up SRR specifiers Remove more magic numbers and replace with nicely named bools. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-19-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 68 +++++++++++++++++------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 670a9ee37b9a..70de0fe2b3c3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -105,11 +105,6 @@ name: ori reg,reg,(ABS_ADDR(label))@l; \ addis reg,reg,(ABS_ADDR(label))@h -/* Exception register prefixes */ -#define EXC_HV_OR_STD 2 /* depends on HVMODE */ -#define EXC_HV 1 -#define EXC_STD 0 - /* * Branch to label using its 0xC000 address. This results in instruction * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned @@ -128,6 +123,7 @@ name: */ #define IVEC .L_IVEC_\name\() #define IHSRR .L_IHSRR_\name\() +#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() #define IAREA .L_IAREA_\name\() #define IVIRT .L_IVIRT_\name\() #define IISIDE .L_IISIDE_\name\() @@ -159,7 +155,10 @@ do_define_int n .error "IVEC not defined" .endif .ifndef IHSRR - IHSRR=EXC_STD + IHSRR=0 + .endif + .ifndef IHSRR_IF_HVMODE + IHSRR_IF_HVMODE=0 .endif .ifndef IAREA IAREA=PACA_EXGEN @@ -257,7 +256,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) /* HSRR variants have the 0x2 bit added to their trap number */ - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION ori r12,r12,(IVEC + 0x2) FTR_SECTION_ELSE @@ -278,7 +277,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r10,IAREA+EX_R10(r13) ld r11,IAREA+EX_R11(r13) ld r12,IAREA+EX_R12(r13) - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION b kvmppc_skip_Hinterrupt FTR_SECTION_ELSE @@ -403,7 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) stw r10,IAREA+EX_DSISR(r13) .endif - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION mfspr r11,SPRN_HSRR0 /* save HSRR0 */ mfspr r12,SPRN_HSRR1 /* and HSRR1 */ @@ -499,7 +498,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .abort "Bad maskable vector" .endif - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION bne masked_Hinterrupt FTR_SECTION_ELSE @@ -632,12 +631,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. */ -.macro EXCEPTION_RESTORE_REGS hsrr +.macro EXCEPTION_RESTORE_REGS hsrr=0 /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1) - .if \hsrr == EXC_HV_OR_STD - .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS" - .endif .if \hsrr mtspr SPRN_HSRR1,r9 .else @@ -912,7 +908,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - EXCEPTION_RESTORE_REGS EXC_STD + EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL GEN_KVM system_reset @@ -966,7 +962,7 @@ TRAMP_REAL_BEGIN(machine_check_fwnmi) lhz r12,PACA_IN_MCE(r13); \ subi r12,r12,1; \ sth r12,PACA_IN_MCE(r13); \ - EXCEPTION_RESTORE_REGS EXC_STD + EXCEPTION_RESTORE_REGS EXC_COMMON_BEGIN(machine_check_early_common) __GEN_REALMODE_COMMON_ENTRY machine_check_early @@ -1337,7 +1333,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 - IHSRR=EXC_HV_OR_STD + IHSRR_IF_HVMODE=1 IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 @@ -1506,7 +1502,7 @@ EXC_COMMON_BEGIN(decrementer_common) INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 - IHSRR=EXC_HV + IHSRR=1 ISTACK=0 IRECONCILE=0 IKVM_REAL=1 @@ -1735,7 +1731,7 @@ EXC_COMMON_BEGIN(single_step_common) INT_DEFINE_BEGIN(h_data_storage) IVEC=0xe00 - IHSRR=EXC_HV + IHSRR=1 IDAR=1 IDSISR=1 IKVM_SKIP=1 @@ -1767,7 +1763,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) INT_DEFINE_BEGIN(h_instr_storage) IVEC=0xe20 - IHSRR=EXC_HV + IHSRR=1 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(h_instr_storage) @@ -1790,7 +1786,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common) INT_DEFINE_BEGIN(emulation_assist) IVEC=0xe40 - IHSRR=EXC_HV + IHSRR=1 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(emulation_assist) @@ -1818,7 +1814,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) */ INT_DEFINE_BEGIN(hmi_exception_early) IVEC=0xe60 - IHSRR=EXC_HV + IHSRR=1 IREALMODE_COMMON=1 ISTACK=0 IRECONCILE=0 @@ -1828,7 +1824,7 @@ INT_DEFINE_END(hmi_exception_early) INT_DEFINE_BEGIN(hmi_exception) IVEC=0xe60 - IHSRR=EXC_HV + IHSRR=1 IMASK=IRQS_DISABLED IKVM_REAL=1 INT_DEFINE_END(hmi_exception) @@ -1852,7 +1848,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) cmpdi cr0,r3,0 bne 1f - EXCEPTION_RESTORE_REGS EXC_HV + EXCEPTION_RESTORE_REGS hsrr=1 HRFI_TO_USER_OR_KERNEL 1: @@ -1860,7 +1856,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) * Go to virtual mode and pull the HMI event information from * firmware. */ - EXCEPTION_RESTORE_REGS EXC_HV + EXCEPTION_RESTORE_REGS hsrr=1 GEN_INT_ENTRY hmi_exception, virt=0 GEN_KVM hmi_exception_early @@ -1879,7 +1875,7 @@ EXC_COMMON_BEGIN(hmi_exception_common) INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 - IHSRR=EXC_HV + IHSRR=1 IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 @@ -1908,7 +1904,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 - IHSRR=EXC_HV + IHSRR=1 IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 @@ -2078,7 +2074,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common) INT_DEFINE_BEGIN(h_facility_unavailable) IVEC=0xf80 - IHSRR=EXC_HV + IHSRR=1 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(h_facility_unavailable) @@ -2114,7 +2110,7 @@ EXC_VIRT_NONE(0x5100, 0x100) #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_system_error) IVEC=0x1200 - IHSRR=EXC_HV + IHSRR=1 IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(cbe_system_error) @@ -2165,8 +2161,8 @@ EXC_VIRT_NONE(0x5400, 0x100) INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 - IHSRR=EXC_HV - IBRANCH_TO_COMMON=0 + IHSRR=1 + IBRANCH_COMMON=0 IKVM_REAL=1 INT_DEFINE_END(denorm_exception) @@ -2274,7 +2270,7 @@ EXC_COMMON_BEGIN(denorm_exception_common) #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_maintenance) IVEC=0x1600 - IHSRR=EXC_HV + IHSRR=1 IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(cbe_maintenance) @@ -2326,7 +2322,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) #ifdef CONFIG_CBE_RAS INT_DEFINE_BEGIN(cbe_thermal) IVEC=0x1800 - IHSRR=EXC_HV + IHSRR=1 IKVM_SKIP=1 IKVM_REAL=1 INT_DEFINE_END(cbe_thermal) @@ -2389,7 +2385,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ -.macro MASKED_INTERRUPT hsrr +.macro MASKED_INTERRUPT hsrr=0 .if \hsrr masked_Hinterrupt: .else @@ -2536,8 +2532,8 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) hrfid USE_TEXT_SECTION() - MASKED_INTERRUPT EXC_STD - MASKED_INTERRUPT EXC_HV + MASKED_INTERRUPT + MASKED_INTERRUPT hsrr=1 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER kvmppc_skip_interrupt: -- cgit v1.2.3-59-g8ed1b From 94325357e8c064e28930a9f571395ca9782a6e6d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:28 +1000 Subject: powerpc/64s/exception: Add more comments for interrupt handlers A few of the non-standard handlers are left uncommented. Some more description could be added to some. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-20-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 391 +++++++++++++++++++++++++++++++---- 1 file changed, 353 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 70de0fe2b3c3..6bd157108991 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -121,26 +121,26 @@ name: /* * Interrupt code generation macros */ -#define IVEC .L_IVEC_\name\() -#define IHSRR .L_IHSRR_\name\() -#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() -#define IAREA .L_IAREA_\name\() -#define IVIRT .L_IVIRT_\name\() -#define IISIDE .L_IISIDE_\name\() -#define IDAR .L_IDAR_\name\() -#define IDSISR .L_IDSISR_\name\() -#define ISET_RI .L_ISET_RI_\name\() -#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() -#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() -#define IMASK .L_IMASK_\name\() -#define IKVM_SKIP .L_IKVM_SKIP_\name\() -#define IKVM_REAL .L_IKVM_REAL_\name\() +#define IVEC .L_IVEC_\name\() /* Interrupt vector address */ +#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */ +#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */ +#define IAREA .L_IAREA_\name\() /* PACA save area */ +#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */ +#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */ +#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */ +#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */ +#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */ +#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ +#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ +#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ +#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */ +#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */ #define __IKVM_REAL(name) .L_IKVM_REAL_ ## name -#define IKVM_VIRT .L_IKVM_VIRT_\name\() -#define ISTACK .L_ISTACK_\name\() +#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */ +#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name -#define IRECONCILE .L_IRECONCILE_\name\() -#define IKUAP .L_IKUAP_\name\() +#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */ +#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ #define INT_DEFINE_BEGIN(n) \ .macro int_define_ ## n name @@ -773,6 +773,39 @@ __start_interrupts: EXC_VIRT_NONE(0x4000, 0x100) +/** + * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI). + * This is a non-maskable, asynchronous interrupt always taken in real-mode. + * It is caused by: + * - Wake from power-saving state, on powernv. + * - An NMI from another CPU, triggered by firmware or hypercall. + * - As crash/debug signal injected from BMC, firmware or hypervisor. + * + * Handling: + * Power-save wakeup is the only performance critical path, so this is + * determined quickly as possible first. In this case volatile registers + * can be discarded and SPRs like CFAR don't need to be read. + * + * If not a powersave wakeup, then it's run as a regular interrupt, however + * it uses its own stack and PACA save area to preserve the regular kernel + * environment for debugging. + * + * This interrupt is not maskable, so triggering it when MSR[RI] is clear, + * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely + * correct to switch to virtual mode to run the regular interrupt handler + * because it might be interrupted when the MMU is in a bad state (e.g., SLB + * is clear). + * + * FWNMI: + * PAPR specifies a "fwnmi" facility which sends the sreset to a different + * entry point with a different register set up. Some hypervisors will + * send the sreset to 0x100 in the guest if it is not fwnmi capable. + * + * KVM: + * Unlike most SRR interrupts, this may be taken by the host while executing + * in a guest, so a KVM test is required. KVM will pull the CPU out of guest + * mode and then raise the sreset. + */ INT_DEFINE_BEGIN(system_reset) IVEC=0x100 IAREA=PACA_EXNMI @@ -848,6 +881,7 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) * Vectors for the FWNMI option. Share common code. */ TRAMP_REAL_BEGIN(system_reset_fwnmi) + /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */ __IKVM_REAL(system_reset)=0 GEN_INT_ENTRY system_reset, virt=0 @@ -914,6 +948,44 @@ EXC_COMMON_BEGIN(system_reset_common) GEN_KVM system_reset +/** + * Interrupt 0x200 - Machine Check Interrupt (MCE). + * This is a non-maskable interrupt always taken in real-mode. It can be + * synchronous or asynchronous, caused by hardware or software, and it may be + * taken in a power-saving state. + * + * Handling: + * Similarly to system reset, this uses its own stack and PACA save area, + * the difference is re-entrancy is allowed on the machine check stack. + * + * machine_check_early is run in real mode, and carefully decodes the + * machine check and tries to handle it (e.g., flush the SLB if there was an + * error detected there), determines if it was recoverable and logs the + * event. + * + * Then, depending on the execution context when the interrupt is taken, there + * are 3 main actions: + * - Executing in kernel mode. The event is queued with irq_work, which means + * it is handled when it is next safe to do so (i.e., the kernel has enabled + * interrupts), which could be immediately when the interrupt returns. This + * avoids nasty issues like switching to virtual mode when the MMU is in a + * bad state, or when executing OPAL code. (SRESET is exposed to such issues, + * but it has different priorities). Check to see if the CPU was in power + * save, and return via the wake up code if it was. + * + * - Executing in user mode. machine_check_exception is run like a normal + * interrupt handler, which processes the data generated by the early handler. + * + * - Executing in guest mode. The interrupt is run with its KVM test, and + * branches to KVM to deal with. KVM may queue the event for the host + * to report later. + * + * This interrupt is not maskable, so if it triggers when MSR[RI] is clear, + * or SCRATCH0 is in use, it may cause a crash. + * + * KVM: + * See SRESET. + */ INT_DEFINE_BEGIN(machine_check_early) IVEC=0x200 IAREA=PACA_EXMC @@ -1175,19 +1247,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) /** - * 0x300 - Data Storage Interrupt (DSI) - * This interrupt is generated due to a data access which does not have a valid - * page table entry with permissions to allow the data access to be performed. - * DAWR matches also fault here, as do RC updates, and minor misc errors e.g., - * copy/paste, AMO, certain invalid CI accesses, etc. + * Interrupt 0x300 - Data Storage Interrupt (DSI). + * This is a synchronous interrupt generated due to a data access exception, + * e.g., a load orstore which does not have a valid page table entry with + * permissions. DAWR matches also fault here, as do RC updates, and minor misc + * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc. + * + * Handling: + * - Hash MMU + * Go to do_hash_page first to see if the HPT can be filled from an entry in + * the Linux page table. Hash faults can hit in kernel mode in a fairly + * arbitrary state (e.g., interrupts disabled, locks held) when accessing + * "non-bolted" regions, e.g., vmalloc space. However these should always be + * backed by Linux page tables. * - * This interrupt is delivered to the guest (HV bit unchanged). + * If none is found, do a Linux page fault. Linux page faults can happen in + * kernel mode due to user copy operations of course. * - * Linux HPT responds by first attempting to refill the hash table from the - * Linux page table, then going to a full page fault if the Linux page table - * entry was insufficient. RPT goes straight to full page fault. + * - Radix MMU + * The hardware loads from the Linux page table directly, so a fault goes + * immediately to Linux page fault. * - * PR KVM ...? + * Conditions like DAWR match are handled on the way in to Linux page fault. */ INT_DEFINE_BEGIN(data_access) IVEC=0x300 @@ -1218,6 +1299,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) GEN_KVM data_access +/** + * Interrupt 0x380 - Data Segment Interrupt (DSLB). + * This is a synchronous interrupt in response to an MMU fault missing SLB + * entry for HPT, or an address outside RPT translation range. + * + * Handling: + * - HPT: + * This refills the SLB, or reports an access fault similarly to a bad page + * fault. When coming from user-mode, the SLB handler may access any kernel + * data, though it may itself take a DSLB. When coming from kernel mode, + * recursive faults must be avoided so access is restricted to the kernel + * image text/data, kernel stack, and any data allocated below + * ppc64_bolted_size (first segment). The kernel handler must avoid stomping + * on user-handler data structures. + * + * A dedicated save area EXSLB is used (XXX: but it actually need not be + * these days, we could use EXGEN). + */ INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 IAREA=PACA_EXSLB @@ -1260,6 +1359,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) GEN_KVM data_access_slb +/** + * Interrupt 0x400 - Instruction Storage Interrupt (ISI). + * This is a synchronous interrupt in response to an MMU fault due to an + * instruction fetch. + * + * Handling: + * Similar to DSI, though in response to fetch. The faulting address is found + * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR). + */ INT_DEFINE_BEGIN(instruction_access) IVEC=0x400 IISIDE=1 @@ -1289,6 +1397,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) GEN_KVM instruction_access +/** + * Interrupt 0x480 - Instruction Segment Interrupt (ISLB). + * This is a synchronous interrupt in response to an MMU fault due to an + * instruction fetch. + * + * Handling: + * Similar to DSLB, though in response to fetch. The faulting address is found + * in SRR0 (rather than DAR). + */ INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 IAREA=PACA_EXSLB @@ -1331,6 +1448,29 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) GEN_KVM instruction_access_slb +/** + * Interrupt 0x500 - External Interrupt. + * This is an asynchronous maskable interrupt in response to an "external + * exception" from the interrupt controller or hypervisor (e.g., device + * interrupt). It is maskable in hardware by clearing MSR[EE], and + * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()). + * + * When running in HV mode, Linux sets up the LPCR[LPES] bit such that + * interrupts are delivered with HSRR registers, guests use SRRs, which + * reqiures IHSRR_IF_HVMODE. + * + * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that + * external interrupts are delivered as Hypervisor Virtualization Interrupts + * rather than External Interrupts. + * + * Handling: + * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead, + * because registers at the time of the interrupt are not so important as it is + * asynchronous. + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, and clear MSR[EE] in the interrupted context. + */ INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 IHSRR_IF_HVMODE=1 @@ -1356,6 +1496,10 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_KVM hardware_interrupt +/** + * Interrupt 0x600 - Alignment Interrupt + * This is a synchronous interrupt in response to data alignment fault. + */ INT_DEFINE_BEGIN(alignment) IVEC=0x600 IDAR=1 @@ -1379,6 +1523,15 @@ EXC_COMMON_BEGIN(alignment_common) GEN_KVM alignment +/** + * Interrupt 0x700 - Program Interrupt (program check). + * This is a synchronous interrupt in response to various instruction faults: + * traps, privilege errors, TM errors, floating point exceptions. + * + * Handling: + * This interrupt may use the "emergency stack" in some cases when being taken + * from kernel context, which complicates handling. + */ INT_DEFINE_BEGIN(program_check) IVEC=0x700 IKVM_REAL=1 @@ -1432,6 +1585,15 @@ EXC_COMMON_BEGIN(program_check_common) GEN_KVM program_check +/* + * Interrupt 0x800 - Floating-Point Unavailable Interrupt. + * This is a synchronous interrupt in response to executing an fp instruction + * with MSR[FP]=0. + * + * Handling: + * This will load FP registers and enable the FP bit if coming from userspace, + * otherwise report a bad kernel use of FP. + */ INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 IRECONCILE=0 @@ -1477,6 +1639,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) GEN_KVM fp_unavailable +/** + * Interrupt 0x900 - Decrementer Interrupt. + * This is an asynchronous interrupt in response to a decrementer exception + * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing + * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e., + * local_irq_disable()). + * + * Handling: + * This calls into Linux timer handler. NVGPRs are not saved (see 0x500). + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled + * in the interrupted context. + * If PPC_WATCHDOG is configured, the soft masked handler will actually set + * things back up to run soft_nmi_interrupt as a regular interrupt handler + * on the emergency stack. + */ INT_DEFINE_BEGIN(decrementer) IVEC=0x900 IMASK=IRQS_DISABLED @@ -1500,6 +1679,16 @@ EXC_COMMON_BEGIN(decrementer_common) GEN_KVM decrementer +/** + * Interrupt 0x980 - Hypervisor Decrementer Interrupt. + * This is an asynchronous interrupt, similar to 0x900 but for the HDEC + * register. + * + * Handling: + * Linux does not use this outside KVM where it's used to keep a host timer + * while the guest is given control of DEC. It should normally be caught by + * the KVM test and routed there. + */ INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 IHSRR=1 @@ -1538,6 +1727,20 @@ EXC_COMMON_BEGIN(hdecrementer_common) GEN_KVM hdecrementer +/** + * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt. + * This is an asynchronous interrupt in response to a msgsndp doorbell. + * It is maskable in hardware by clearing MSR[EE], and soft-maskable with + * IRQS_DISABLED mask (i.e., local_irq_disable()). + * + * Handling: + * Guests may use this for IPIs between threads in a core if the + * hypervisor supports it. NVGPRS are not saved (see 0x500). + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, leaving MSR[EE] enabled in the interrupted context because the + * doorbells are edge triggered. + */ INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 IMASK=IRQS_DISABLED @@ -1568,16 +1771,20 @@ EXC_COMMON_BEGIN(doorbell_super_common) EXC_REAL_NONE(0xb00, 0x100) EXC_VIRT_NONE(0x4b00, 0x100) -/* - * system call / hypercall (0xc00, 0x4c00) - * - * The system call exception is invoked with "sc 0" and does not alter HV bit. - * - * The hypercall is invoked with "sc 1" and sets HV=1. +/** + * Interrupt 0xc00 - System Call Interrupt (syscall, hcall). + * This is a synchronous interrupt invoked with the "sc" instruction. The + * system call is invoked with "sc 0" and does not alter the HV bit, so it + * is directed to the currently running OS. The hypercall is invoked with + * "sc 1" and it sets HV=1, so it elevates to hypervisor. * * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to * 0x4c00 virtual mode. * + * Handling: + * If the KVM test fires then it was due to a hypercall and is accordingly + * routed to KVM. Otherwise this executes a normal Linux system call. + * * Call convention: * * syscall and hypercalls register conventions are documented in @@ -1708,6 +1915,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) #endif +/** + * Interrupt 0xd00 - Trace Interrupt. + * This is a synchronous interrupt in response to instruction step or + * breakpoint faults. + */ INT_DEFINE_BEGIN(single_step) IVEC=0xd00 IKVM_REAL=1 @@ -1729,6 +1941,18 @@ EXC_COMMON_BEGIN(single_step_common) GEN_KVM single_step +/** + * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI). + * This is a synchronous interrupt in response to an MMU fault caused by a + * guest data access. + * + * Handling: + * This should always get routed to KVM. In radix MMU mode, this is caused + * by a guest nested radix access that can't be performed due to the + * partition scope page table. In hash mode, this can be caused by guests + * running with translation disabled (virtual real mode) or with VPM enabled. + * KVM will update the page table structures or disallow the access. + */ INT_DEFINE_BEGIN(h_data_storage) IVEC=0xe00 IHSRR=1 @@ -1761,6 +1985,11 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) GEN_KVM h_data_storage +/** + * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI). + * This is a synchronous interrupt in response to an MMU fault caused by a + * guest instruction fetch, similar to HDSI. + */ INT_DEFINE_BEGIN(h_instr_storage) IVEC=0xe20 IHSRR=1 @@ -1784,6 +2013,9 @@ EXC_COMMON_BEGIN(h_instr_storage_common) GEN_KVM h_instr_storage +/** + * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt. + */ INT_DEFINE_BEGIN(emulation_assist) IVEC=0xe40 IHSRR=1 @@ -1807,10 +2039,29 @@ EXC_COMMON_BEGIN(emulation_assist_common) GEN_KVM emulation_assist -/* - * hmi_exception trampoline is a special case. It jumps to hmi_exception_early - * first, and then eventaully from there to the trampoline to get into virtual - * mode. +/** + * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI). + * This is an asynchronous interrupt caused by a Hypervisor Maintenance + * Exception. It is always taken in real mode but uses HSRR registers + * unlike SRESET and MCE. + * + * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable + * with IRQS_DISABLED mask (i.e., local_irq_disable()). + * + * Handling: + * This is a special case, this is handled similarly to machine checks, with an + * initial real mode handler that is not soft-masked, which attempts to fix the + * problem. Then a regular handler which is soft-maskable and reports the + * problem. + * + * The emergency stack is used for the early real mode handler. + * + * XXX: unclear why MCE and HMI schemes could not be made common, e.g., + * either use soft-masking for the MCE, or use irq_work for the HMI. + * + * KVM: + * Unlike MCE, this calls into KVM without calling the real mode handler + * first. */ INT_DEFINE_BEGIN(hmi_exception_early) IVEC=0xe60 @@ -1873,6 +2124,11 @@ EXC_COMMON_BEGIN(hmi_exception_common) GEN_KVM hmi_exception +/** + * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. + * This is an asynchronous interrupt in response to a msgsnd doorbell. + * Similar to the 0xa00 doorbell but for host rather than guest. + */ INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 IHSRR=1 @@ -1902,6 +2158,11 @@ EXC_COMMON_BEGIN(h_doorbell_common) GEN_KVM h_doorbell +/** + * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. + * This is an asynchronous interrupt in response to an "external exception". + * Similar to 0x500 but for host only. + */ INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 IHSRR=1 @@ -1933,6 +2194,22 @@ EXC_REAL_NONE(0xee0, 0x20) EXC_VIRT_NONE(0x4ee0, 0x20) +/* + * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU). + * This is an asynchronous interrupt in response to a PMU exception. + * It is maskable in hardware by clearing MSR[EE], and soft-maskable with + * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()). + * + * Handling: + * This calls into the perf subsystem. + * + * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it + * runs under local_irq_disable. However it may be soft-masked in + * powerpc-specific code. + * + * If soft masked, the masked handler will note the pending interrupt for + * replay, and clear MSR[EE] in the interrupted context. + */ INT_DEFINE_BEGIN(performance_monitor) IVEC=0xf00 IMASK=IRQS_PMI_DISABLED @@ -1956,6 +2233,12 @@ EXC_COMMON_BEGIN(performance_monitor_common) GEN_KVM performance_monitor +/** + * Interrupt 0xf20 - Vector Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing a vector (or altivec) instruction with MSR[VEC]=0. + * Similar to FP unavailable. + */ INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 IRECONCILE=0 @@ -2004,6 +2287,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) GEN_KVM altivec_unavailable +/** + * Interrupt 0xf40 - VSX Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing a VSX instruction with MSR[VSX]=0. + * Similar to FP unavailable. + */ INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 IRECONCILE=0 @@ -2051,6 +2340,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) GEN_KVM vsx_unavailable +/** + * Interrupt 0xf60 - Facility Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing an instruction without access to the facility that can be + * resolved by the OS (e.g., FSCR, MSR). + * Similar to FP unavailable. + */ INT_DEFINE_BEGIN(facility_unavailable) IVEC=0xf60 IKVM_REAL=1 @@ -2072,6 +2368,13 @@ EXC_COMMON_BEGIN(facility_unavailable_common) GEN_KVM facility_unavailable +/** + * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt. + * This is a synchronous interrupt in response to + * executing an instruction without access to the facility that can only + * be resolved in HV mode (e.g., HFSCR). + * Similar to FP unavailable. + */ INT_DEFINE_BEGIN(h_facility_unavailable) IVEC=0xf80 IHSRR=1 @@ -2159,6 +2462,18 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) +/** + * Interrupt 0x1500 - Soft Patch Interrupt + * + * Handling: + * This is an implementation specific interrupt which can be used for a + * range of exceptions. + * + * This interrupt handler is unique in that it runs the denormal assist + * code even for guests (and even in guest context) without going to KVM, + * for speed. POWER9 does not raise denorm exceptions, so this special case + * could be phased out in future to reduce special cases. + */ INT_DEFINE_BEGIN(denorm_exception) IVEC=0x1500 IHSRR=1 -- cgit v1.2.3-59-g8ed1b From 2284ffea8f0c7849a80e76ec698d38506b51a4e8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:29 +1000 Subject: powerpc/64s/exception: Only test KVM in SRR interrupts when PR KVM is supported Apart from SRESET, MCE, and syscall (hcall variant), the SRR type interrupts are not escalated to hypervisor mode, so are delivered to the OS. When running PR KVM, the OS is the hypervisor, and the guest runs with MSR[PR]=1 (ie. usermode), so these interrupts must test if a guest was running when interrupted. These tests are required at the real-mode entry points because the PR KVM host runs with LPCR[AIL]=0. In HV KVM and nested HV KVM, the guest always receives these interrupts, so there is no need for the host to make this test. So remove the tests if PR KVM is not configured. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-21-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 65 ++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6bd157108991..d7a79b1f3c65 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -214,9 +214,36 @@ do_define_int n #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* - * If hv is possible, interrupts come into to the hv version - * of the kvmppc_interrupt code, which then jumps to the PR handler, - * kvmppc_interrupt_pr, if the guest is a PR guest. + * All interrupts which set HSRR registers, as well as SRESET and MCE and + * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken, + * so they all generally need to test whether they were taken in guest context. + * + * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be + * taken with MSR[HV]=0. + * + * Interrupts which set SRR registers (with the above exceptions) do not + * elevate to MSR[HV]=1 mode, though most can be taken when running with + * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do + * not need to test whether a guest is running because they get delivered to + * the guest directly, including nested HV KVM guests. + * + * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host + * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the + * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be + * delivered to the real-mode entry point, therefore such interrupts only test + * KVM in their real mode handlers, and only when PR KVM is possible. + * + * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always + * delivered in real-mode when the MMU is in hash mode because the MMU + * registers are not set appropriately to translate host addresses. In nested + * radix mode these can be delivered in virt-mode as the host translations are + * used implicitly (see: effective LPID, effective PID). + */ + +/* + * If an interrupt is taken while a guest is running, it is immediately routed + * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first + * to kvmppc_interrupt_hv, which handles the PR guest case. */ #define kvmppc_interrupt kvmppc_interrupt_hv #else @@ -1274,8 +1301,10 @@ INT_DEFINE_BEGIN(data_access) IVEC=0x300 IDAR=1 IDSISR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 +#endif INT_DEFINE_END(data_access) EXC_REAL_BEGIN(data_access, 0x300, 0x80) @@ -1322,8 +1351,10 @@ INT_DEFINE_BEGIN(data_access_slb) IAREA=PACA_EXSLB IRECONCILE=0 IDAR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 +#endif INT_DEFINE_END(data_access_slb) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) @@ -1373,7 +1404,9 @@ INT_DEFINE_BEGIN(instruction_access) IISIDE=1 IDAR=1 IDSISR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(instruction_access) EXC_REAL_BEGIN(instruction_access, 0x400, 0x80) @@ -1412,7 +1445,9 @@ INT_DEFINE_BEGIN(instruction_access_slb) IRECONCILE=0 IISIDE=1 IDAR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(instruction_access_slb) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) @@ -1504,7 +1539,9 @@ INT_DEFINE_BEGIN(alignment) IVEC=0x600 IDAR=1 IDSISR=1 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(alignment) EXC_REAL_BEGIN(alignment, 0x600, 0x100) @@ -1534,7 +1571,9 @@ EXC_COMMON_BEGIN(alignment_common) */ INT_DEFINE_BEGIN(program_check) IVEC=0x700 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(program_check) EXC_REAL_BEGIN(program_check, 0x700, 0x100) @@ -1597,7 +1636,9 @@ EXC_COMMON_BEGIN(program_check_common) INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 IRECONCILE=0 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(fp_unavailable) EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) @@ -1659,7 +1700,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) INT_DEFINE_BEGIN(decrementer) IVEC=0x900 IMASK=IRQS_DISABLED +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(decrementer) EXC_REAL_BEGIN(decrementer, 0x900, 0x80) @@ -1744,7 +1787,9 @@ EXC_COMMON_BEGIN(hdecrementer_common) INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 IMASK=IRQS_DISABLED +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(doorbell_super) EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) @@ -1922,7 +1967,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) */ INT_DEFINE_BEGIN(single_step) IVEC=0xd00 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(single_step) EXC_REAL_BEGIN(single_step, 0xd00, 0x100) @@ -2213,7 +2260,9 @@ EXC_VIRT_NONE(0x4ee0, 0x20) INT_DEFINE_BEGIN(performance_monitor) IVEC=0xf00 IMASK=IRQS_PMI_DISABLED +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(performance_monitor) EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) @@ -2242,7 +2291,9 @@ EXC_COMMON_BEGIN(performance_monitor_common) INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 IRECONCILE=0 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(altivec_unavailable) EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) @@ -2296,7 +2347,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 IRECONCILE=0 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(vsx_unavailable) EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) @@ -2349,7 +2402,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) */ INT_DEFINE_BEGIN(facility_unavailable) IVEC=0xf60 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(facility_unavailable) EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20) @@ -2439,8 +2494,10 @@ EXC_VIRT_NONE(0x5200, 0x100) INT_DEFINE_BEGIN(instruction_breakpoint) IVEC=0x1300 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 +#endif INT_DEFINE_END(instruction_breakpoint) EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100) @@ -2611,7 +2668,9 @@ EXC_VIRT_NONE(0x5600, 0x100) INT_DEFINE_BEGIN(altivec_assist) IVEC=0x1700 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 +#endif INT_DEFINE_END(altivec_assist) EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100) -- cgit v1.2.3-59-g8ed1b From b44fc96d7ba96eeb6cf59d0a95d1d35cb7f076d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:30 +1000 Subject: powerpc/64s/exception: Reconcile interrupts in system_reset This adds IRQ_HARD_DIS to irq_happened. Although it doesn't seem to matter much because we're not allowed to enable irqs in an NMI handler, the soft-irq debugging code is becoming more strict about ensuring IRQ_HARD_DIS is in sync with MSR[EE], this may help avoid asserts or other issues. Add a comment explaining why MCE does not have this. Early machine check is generally much smaller and more contained code which will explode if you look at it wrong anyway as it runs in real mode, though there's an argument that we should do similar reconciling for the MCE as well. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-22-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d7a79b1f3c65..3ddabac6ede6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -934,18 +934,19 @@ EXC_COMMON_BEGIN(system_reset_common) __GEN_COMMON_BODY system_reset bl save_nvgprs /* - * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does + * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does * the right thing. We do not want to reconcile because that goes * through irq tracing which we don't want in NMI. * - * Save PACAIRQHAPPENED because some code will do a hard disable - * (e.g., xmon). So we want to restore this back to where it was - * when we return. DAR is unused in the stack, so save it there. + * Save PACAIRQHAPPENED to _DAR (otherwise unused), and set HARD_DIS + * as we are running with MSR[EE]=0. */ li r10,IRQS_ALL_DISABLED stb r10,PACAIRQSOFTMASK(r13) lbz r10,PACAIRQHAPPENED(r13) std r10,_DAR(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) addi r3,r1,STACK_FRAME_OVERHEAD bl system_reset_exception @@ -990,6 +991,11 @@ EXC_COMMON_BEGIN(system_reset_common) * error detected there), determines if it was recoverable and logs the * event. * + * This early code does not "reconcile" irq soft-mask state like SRESET or + * regular interrupts do, so irqs_disabled() among other things may not work + * properly (irq disable/enable already doesn't work because irq tracing can + * not work in real mode). + * * Then, depending on the execution context when the interrupt is taken, there * are 3 main actions: * - Executing in kernel mode. The event is queued with irq_work, which means -- cgit v1.2.3-59-g8ed1b From 71c3b05a8083d8774ca66c89672d29d7bf33813e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:31 +1000 Subject: powerpc/64s/exception: Soft NMI interrupt should not use ret_from_except The soft NMI handler does not reconcile interrupt state, so it should not return via the normal ret_from_except path. Return like other NMIs, using the EXCEPTION_RESTORE_REGS macro. This becomes important when the scv interrupt is implemented, which must handle soft-masked interrupts that have r13 set to something other than the PACA -- returning to kernel in this case must restore r13. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-23-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3ddabac6ede6..d75df223da7c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2731,6 +2731,7 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 + IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */ INT_DEFINE_END(soft_nmi) /* @@ -2749,9 +2750,36 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi bl save_nvgprs + + /* + * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see + * system_reset_common) + */ + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + lbz r10,PACAIRQHAPPENED(r13) + std r10,_DAR(r1) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt - b ret_from_except + + /* Clear MSR_RI before setting SRR0 and SRR1. */ + li r9,0 + mtmsrd r9,1 + + /* + * Restore soft mask settings. + */ + ld r10,_DAR(r1) + stb r10,PACAIRQHAPPENED(r13) + ld r10,SOFTE(r1) + stb r10,PACAIRQSOFTMASK(r13) + + kuap_restore_amr r10 + EXCEPTION_RESTORE_REGS hsrr=0 + RFI_TO_KERNEL #endif /* CONFIG_PPC_WATCHDOG */ -- cgit v1.2.3-59-g8ed1b From 965dd3ad307671d06471da0e9a44f3a194167d26 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:32 +1000 Subject: powerpc/64/syscall: Remove non-volatile GPR save optimisation powerpc has an optimisation where interrupts avoid saving the non-volatile (or callee saved) registers to the interrupt stack frame if they are not required. Two problems with this are that an interrupt does not always know whether it will need non-volatiles; and if it does need them, they can only be saved from the entry-scoped asm code (because we don't control what the C compiler does with these registers). system calls are the most difficult: some system calls always require all registers (e.g., fork, to copy regs into the child). Sometimes registers are only required under certain conditions (e.g., tracing, signal delivery). These cases require ugly logic in the call chains (e.g., ppc_fork), and require a lot of logic to be implemented in asm. So remove the optimisation for system calls, and always save NVGPRs on entry. Modern high performance CPUs are not so sensitive, because the stores are dense in cache and can be hidden by other expensive work in the syscall path -- the null syscall selftests benchmark on POWER9 is not slowed (124.40ns before and 123.64ns after, i.e., within the noise). Other interrupts retain the NVGPR optimisation for now. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-24-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 72 ++++++-------------------------- arch/powerpc/kernel/syscalls/syscall.tbl | 22 ++++++---- 2 files changed, 28 insertions(+), 66 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6ba675b0cf7d..14afe12eae8c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION std r11,_XER(r1) std r11,_CTR(r1) std r9,GPR13(r1) + SAVE_NVGPRS(r1) mflr r10 /* * This clears CR0.SO (bit 28), which is the error indication on * return from this system call. */ rldimi r2,r11,28,(63-28) - li r11,0xc01 + li r11,0xc00 std r10,_LINK(r1) std r11,_TRAP(r1) std r3,ORIG_GPR3(r1) @@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Traced system call support */ .Lsyscall_dotrace: - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_enter @@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_leave b ret_from_except @@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) _ASM_NOKPROBE_SYMBOL(system_call_common); _ASM_NOKPROBE_SYMBOL(system_call_exit); -/* Save non-volatile GPRs, if not already saved. */ -_GLOBAL(save_nvgprs) - ld r11,_TRAP(r1) - andi. r0,r11,1 - beqlr- - SAVE_NVGPRS(r1) - clrrdi r0,r11,1 - std r0,_TRAP(r1) - blr -_ASM_NOKPROBE_SYMBOL(save_nvgprs); - - -/* - * The sigsuspend and rt_sigsuspend system calls can call do_signal - * and thus put the process into the stopped state where we might - * want to examine its user state with ptrace. Therefore we need - * to save all the nonvolatile registers (r14 - r31) before calling - * the C code. Similarly, fork, vfork and clone need the full - * register state on the stack so that it can be copied to the child. - */ - -_GLOBAL(ppc_fork) - bl save_nvgprs - bl sys_fork - b .Lsyscall_exit - -_GLOBAL(ppc_vfork) - bl save_nvgprs - bl sys_vfork - b .Lsyscall_exit - -_GLOBAL(ppc_clone) - bl save_nvgprs - bl sys_clone - b .Lsyscall_exit - -_GLOBAL(ppc_clone3) - bl save_nvgprs - bl sys_clone3 - b .Lsyscall_exit - -_GLOBAL(ppc32_swapcontext) - bl save_nvgprs - bl compat_sys_swapcontext - b .Lsyscall_exit - -_GLOBAL(ppc64_swapcontext) - bl save_nvgprs - bl sys_swapcontext - b .Lsyscall_exit - -_GLOBAL(ppc_switch_endian) - bl save_nvgprs - bl sys_switch_endian - b .Lsyscall_exit - _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) @@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit +/* Save non-volatile GPRs, if not already saved. */ +_GLOBAL(save_nvgprs) + ld r11,_TRAP(r1) + andi. r0,r11,1 + beqlr- + SAVE_NVGPRS(r1) + clrrdi r0,r11,1 + std r0,_TRAP(r1) + blr +_ASM_NOKPROBE_SYMBOL(save_nvgprs); + #ifdef CONFIG_PPC_BOOK3S_64 #define FLUSH_COUNT_CACHE \ diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 35b61bfc1b1a..220ae11555f2 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -9,7 +9,9 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 nospu fork ppc_fork +2 32 fork ppc_fork sys_fork +2 64 fork sys_fork +2 spu fork sys_ni_syscall 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -158,7 +160,9 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 nospu clone ppc_clone +120 32 clone ppc_clone sys_clone +120 64 clone sys_clone +120 spu clone sys_ni_syscall 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -240,7 +244,9 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 nospu vfork ppc_vfork +189 32 vfork ppc_vfork sys_vfork +189 64 vfork sys_vfork +189 spu vfork sys_ni_syscall 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -316,8 +322,8 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext ppc32_swapcontext -249 64 swapcontext ppc64_swapcontext +249 32 swapcontext ppc_swapcontext compat_sys_swapcontext +249 64 swapcontext sys_swapcontext 249 spu swapcontext sys_ni_syscall 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 @@ -456,7 +462,7 @@ 361 common bpf sys_bpf 362 nospu execveat sys_execveat compat_sys_execveat 363 32 switch_endian sys_ni_syscall -363 64 switch_endian ppc_switch_endian +363 64 switch_endian sys_switch_endian 363 spu switch_endian sys_ni_syscall 364 common userfaultfd sys_userfaultfd 365 common membarrier sys_membarrier @@ -516,6 +522,8 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 nospu clone3 ppc_clone3 +435 32 clone3 ppc_clone3 sys_clone3 +435 64 clone3 sys_clone3 +435 spu clone3 sys_ni_syscall 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd -- cgit v1.2.3-59-g8ed1b From f14f8a2032af8c38440afa68ccd0586532ec61d5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:33 +1000 Subject: powerpc/64/sstep: Ifdef the deprecated fast endian switch syscall Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-25-npiggin@gmail.com --- arch/powerpc/lib/sstep.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a1..5f3a7bd9d90d 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3179,8 +3179,9 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) * entry code works. If that is changed, this will * need to be changed also. */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { + if (IS_ENABLED(CONFIG_PPC_FAST_ENDIAN_SWITCH) && + cpu_has_feature(CPU_FTR_REAL_LE) && + regs->gpr[0] == 0x1ebe) { regs->msr ^= MSR_LE; goto instr_done; } -- cgit v1.2.3-59-g8ed1b From 68b34588e2027f699a3c034235f21cd19356b2e6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:34 +1000 Subject: powerpc/64/sycall: Implement syscall entry/exit logic in C System call entry and particularly exit code is beyond the limit of what is reasonable to implement in asm. This conversion moves all conditional branches out of the asm code, except for the case that all GPRs should be restored at exit. Null syscall test is about 5% faster after this patch, because the exit work is handled under local_irq_disable, and the hard mask and pending interrupt replay is handled after that, which avoids games with MSR. mpe: Includes subsequent fixes from Nick: This fixes 4 issues caught by TM selftests. First was a tm-syscall bug that hit due to tabort_syscall being called after interrupts were reconciled (in a subsequent patch), which led to interrupts being enabled before tabort_syscall was called. Rather than going through an un-reconciling interrupts for the return, I just go back to putting the test early in asm, the C-ification of that wasn't a big win anyway. Second is the syscall return _TIF_USER_WORK_MASK check would go into an infinite loop if _TIF_RESTORE_TM became set. The asm code uses _TIF_USER_WORK_MASK to brach to slowpath which includes restore_tm_state. Third is system call return was not calling restore_tm_state, I missed this completely (alhtough it's in the return from interrupt C conversion because when the asm syscall code encountered problems it would branch to the interrupt return code. Fourth is MSR_VEC missing from restore_math, which was caught by tm-unavailable selftest taking an unexpected facility unavailable interrupt when testing VSX unavailble exception with MSR.FP=1 MSR.VEC=1. Fourth case also has a fixup in a subsequent patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-26-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 13 +- arch/powerpc/include/asm/book3s/64/kup-radix.h | 14 +- arch/powerpc/include/asm/cputime.h | 33 +++ arch/powerpc/include/asm/hw_irq.h | 4 + arch/powerpc/include/asm/ptrace.h | 3 + arch/powerpc/include/asm/signal.h | 3 + arch/powerpc/include/asm/switch_to.h | 5 + arch/powerpc/include/asm/time.h | 3 + arch/powerpc/kernel/Makefile | 3 +- arch/powerpc/kernel/entry_64.S | 326 ++++--------------------- arch/powerpc/kernel/signal.h | 2 - arch/powerpc/kernel/syscall_64.c | 214 ++++++++++++++++ arch/powerpc/kernel/systbl.S | 9 +- 13 files changed, 326 insertions(+), 306 deletions(-) create mode 100644 arch/powerpc/kernel/syscall_64.c diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 983c0084fb3f..ab59a4904254 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -97,6 +97,8 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, unsigned long __init early_init(unsigned long dt_ptr); void __init machine_init(u64 dt_ptr); #endif +long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs); long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); @@ -104,14 +106,6 @@ long sys_switch_endian(void); notrace unsigned int __check_irq_replay(void); void notrace restore_interrupts(void); -/* ptrace */ -long do_syscall_trace_enter(struct pt_regs *regs); -void do_syscall_trace_leave(struct pt_regs *regs); - -/* process */ -void restore_math(struct pt_regs *regs); -void restore_tm_state(struct pt_regs *regs); - /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, @@ -122,9 +116,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, void __init early_setup(unsigned long dt_ptr); void early_setup_secondary(void); -/* time */ -void accumulate_stolen_time(void); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 90dd3a3fc8c7..71081d90f999 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H #include +#include #define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) #define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) @@ -56,7 +57,14 @@ #ifdef CONFIG_PPC_KUAP -#include +#include +#include + +static inline void kuap_check_amr(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP)) + WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); +} /* * We support individually allowing read or write, but we don't support nesting @@ -127,6 +135,10 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_check_amr(void) +{ +} #endif /* CONFIG_PPC_KUAP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 2431b4ada2fa..0fccd5ea1e9a 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -43,9 +43,12 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) */ #ifdef CONFIG_PPC64 #define get_accounting(tsk) (&get_paca()->accounting) +#define raw_get_accounting(tsk) (&local_paca->accounting) static inline void arch_vtime_task_switch(struct task_struct *tsk) { } + #else #define get_accounting(tsk) (&task_thread_info(tsk)->accounting) +#define raw_get_accounting(tsk) get_accounting(tsk) /* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on @@ -60,6 +63,36 @@ static inline void arch_vtime_task_switch(struct task_struct *prev) } #endif +/* + * account_cpu_user_entry/exit runs "unreconciled", so can't trace, + * can't use use get_paca() + */ +static notrace inline void account_cpu_user_entry(void) +{ + unsigned long tb = mftb(); + struct cpu_accounting_data *acct = raw_get_accounting(current); + + acct->utime += (tb - acct->starttime_user); + acct->starttime = tb; +} + +static notrace inline void account_cpu_user_exit(void) +{ + unsigned long tb = mftb(); + struct cpu_accounting_data *acct = raw_get_accounting(current); + + acct->stime += (tb - acct->starttime); + acct->starttime_user = tb; +} + + #endif /* __KERNEL__ */ +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +static inline void account_cpu_user_entry(void) +{ +} +static inline void account_cpu_user_exit(void) +{ +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e3a905e3d573..310583e62bd9 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -228,9 +228,13 @@ static inline bool arch_irqs_disabled(void) #ifdef CONFIG_PPC_BOOK3E #define __hard_irq_enable() wrtee(MSR_EE) #define __hard_irq_disable() wrtee(0) +#define __hard_EE_RI_disable() wrtee(0) +#define __hard_RI_enable() do { } while (0) #else #define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1) #define __hard_irq_disable() __mtmsrd(MSR_RI, 1) +#define __hard_EE_RI_disable() __mtmsrd(0, 1) +#define __hard_RI_enable() __mtmsrd(MSR_RI, 1) #endif #define hard_irq_disable() do { \ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index ee3ada66deb5..082a40153b94 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -138,6 +138,9 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif +long do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_leave(struct pt_regs *regs); + #define kernel_stack_pointer(regs) ((regs)->gpr[1]) static inline int is_syscall_success(struct pt_regs *regs) { diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index 0803ca8b9149..99e1c6de27bc 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -6,4 +6,7 @@ #include #include +struct pt_regs; +void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); + #endif /* _ASM_POWERPC_SIGNAL_H */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 5b03d8a82409..476008bc3d08 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -5,6 +5,7 @@ #ifndef _ASM_POWERPC_SWITCH_TO_H #define _ASM_POWERPC_SWITCH_TO_H +#include #include struct thread_struct; @@ -22,6 +23,10 @@ extern void switch_booke_debug_regs(struct debug_reg *new_debug); extern int emulate_altivec(struct pt_regs *); +void restore_math(struct pt_regs *regs); + +void restore_tm_state(struct pt_regs *regs); + extern void flush_all_to_thread(struct task_struct *); extern void giveup_all(struct task_struct *); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index e0107495c4de..39ce95016a3a 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -194,5 +194,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); +/* SPLPAR */ +void accumulate_stolen_time(void); + #endif /* __KERNEL__ */ #endif /* __POWERPC_TIME_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 78a1b22d4fd8..5700231a8988 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -50,7 +50,8 @@ obj-y := cputable.o ptrace.o syscalls.o \ of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ - paca.o nvram_64.o firmware.o note.o + paca.o nvram_64.o firmware.o note.o \ + syscall_64.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 14afe12eae8c..5f70830b5ae4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -69,6 +69,7 @@ BEGIN_FTR_SECTION bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif +_ASM_NOKPROBE_SYMBOL(system_call_common) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -76,341 +77,98 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) + std r2,GPR2(r1) #ifdef CONFIG_PPC_FSL_BOOK3E START_BTB_FLUSH_SECTION BTB_FLUSH(r10) END_BTB_FLUSH_SECTION #endif - ACCOUNT_CPU_USER_ENTRY(r13, r10, r11) - std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ std r3,GPR3(r1) - mfcr r2 std r4,GPR4(r1) std r5,GPR5(r1) std r6,GPR6(r1) std r7,GPR7(r1) std r8,GPR8(r1) - li r11,0 + /* Zero r9-r12, this should only be required when restoring all GPRs */ std r11,GPR9(r1) std r11,GPR10(r1) std r11,GPR11(r1) std r11,GPR12(r1) - std r11,_XER(r1) - std r11,_CTR(r1) std r9,GPR13(r1) SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_CTR(r1) mflr r10 + /* * This clears CR0.SO (bit 28), which is the error indication on * return from this system call. */ - rldimi r2,r11,28,(63-28) + rldimi r12,r11,28,(63-28) li r11,0xc00 std r10,_LINK(r1) std r11,_TRAP(r1) + std r12,_CCR(r1) std r3,ORIG_GPR3(r1) - std r2,_CCR(r1) - ld r2,PACATOC(r13) - addi r9,r1,STACK_FRAME_OVERHEAD + addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) - std r11,-16(r9) /* "regshere" marker */ - - kuap_check_amr r10, r11 - -#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) -BEGIN_FW_FTR_SECTION - /* see if there are any DTL entries to process */ - ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */ - ld r11,PACA_DTL_RIDX(r13) /* get log read index */ - addi r10,r10,LPPACA_DTLIDX - LDX_BE r10,0,r10 /* get log write index */ - cmpd r11,r10 - beq+ 33f - bl accumulate_stolen_time - REST_GPR(0,r1) - REST_4GPRS(3,r1) - REST_2GPRS(7,r1) - addi r9,r1,STACK_FRAME_OVERHEAD -33: -END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ - - /* - * A syscall should always be called with interrupts enabled - * so we just unconditionally hard-enable here. When some kind - * of irq tracing is used, we additionally check that condition - * is correct - */ -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - lbz r10,PACAIRQSOFTMASK(r13) -1: tdnei r10,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - -#ifdef CONFIG_PPC_BOOK3E - wrteei 1 -#else - li r11,MSR_RI - ori r11,r11,MSR_EE - mtmsrd r11,1 -#endif /* CONFIG_PPC_BOOK3E */ - -system_call: /* label this so stack traces look sane */ - /* We do need to set SOFTE in the stack frame or the return - * from interrupt will be painful - */ - li r10,IRQS_ENABLED - std r10,SOFTE(r1) - - ld r11, PACA_THREAD_INFO(r13) - ld r10,TI_FLAGS(r11) - andi. r11,r10,_TIF_SYSCALL_DOTRACE - bne .Lsyscall_dotrace /* does not return */ - cmpldi 0,r0,NR_syscalls - bge- .Lsyscall_enosys + std r11,-16(r10) /* "regshere" marker */ -.Lsyscall: -/* - * Need to vector to 32 Bit or default sys_call_table here, - * based on caller's run-mode / personality. - */ - ld r11,SYS_CALL_TABLE@toc(2) - andis. r10,r10,_TIF_32BIT@h - beq 15f - ld r11,COMPAT_SYS_CALL_TABLE@toc(2) - clrldi r3,r3,32 - clrldi r4,r4,32 - clrldi r5,r5,32 - clrldi r6,r6,32 - clrldi r7,r7,32 - clrldi r8,r8,32 -15: - slwi r0,r0,3 - - barrier_nospec_asm - /* - * Prevent the load of the handler below (based on the user-passed - * system call number) being speculatively executed until the test - * against NR_syscalls and branch to .Lsyscall_enosys above has - * committed. - */ - - ldx r12,r11,r0 /* Fetch system call handler [ptr] */ - mtctr r12 - bctrl /* Call handler */ + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception - /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */ .Lsyscall_exit: - std r3,RESULT(r1) - -#ifdef CONFIG_DEBUG_RSEQ - /* Check whether the syscall is issued inside a restartable sequence */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl rseq_syscall - ld r3,RESULT(r1) -#endif - - ld r12, PACA_THREAD_INFO(r13) - - ld r8,_MSR(r1) - -/* - * This is a few instructions into the actual syscall exit path (which actually - * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the - * number of visible symbols for profiling purposes. - * - * We can probe from system_call until this point as MSR_RI is set. But once it - * is cleared below, we won't be able to take a trap. - * - * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL(). - */ -system_call_exit: - /* - * Disable interrupts so current_thread_info()->flags can't change, - * and so that we don't get interrupted after loading SRR0/1. - * - * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we - * could fault on the load of the TI_FLAGS below. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r11,MSR_RI - mtmsrd r11,1 -#endif /* CONFIG_PPC_BOOK3E */ + addi r4,r1,STACK_FRAME_OVERHEAD + bl syscall_exit_prepare - ld r9,TI_FLAGS(r12) - li r11,-MAX_ERRNO - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) - bne- .Lsyscall_exit_work + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + ld r6,_LINK(r1) - andi. r0,r8,MSR_FP - beq 2f -#ifdef CONFIG_ALTIVEC - andis. r0,r8,MSR_VEC@h - bne 3f -#endif -2: addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_math - ld r8,_MSR(r1) - ld r3,RESULT(r1) - li r11,-MAX_ERRNO - -3: cmpld r3,r11 - ld r5,_CCR(r1) - bge- .Lsyscall_error -.Lsyscall_error_cont: - ld r7,_NIP(r1) BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - andi. r6,r8,MSR_PR - ld r4,_LINK(r1) - kuap_check_amr r10, r11 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + mtlr r6 -#ifdef CONFIG_PPC_BOOK3S - /* - * Clear MSR_RI, MSR_EE is already and remains disabled. We could do - * this later, but testing shows that doing it here causes less slow - * down than doing it closer to the rfid. - */ - li r11,0 - mtmsrd r11,1 -#endif - - beq- 1f - ACCOUNT_CPU_USER_EXIT(r13, r11, r12) + cmpdi r3,0 + bne .Lsyscall_restore_regs +.Lsyscall_restore_regs_cont: BEGIN_FTR_SECTION HMT_MEDIUM_LOW END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - std r8, PACATMSCRATCH(r13) -#endif - /* * We don't need to restore AMR on the way back to userspace for KUAP. * The value of AMR only matters while we're in the kernel. */ - ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + mtcr r2 ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) ld r1,GPR1(r1) - mtlr r4 - mtcr r5 - mtspr SPRN_SRR0,r7 - mtspr SPRN_SRR1,r8 RFI_TO_USER b . /* prevent speculative execution */ -1: /* exit to kernel */ - kuap_restore_amr r2 - - ld r2,GPR2(r1) - ld r1,GPR1(r1) - mtlr r4 - mtcr r5 - mtspr SPRN_SRR0,r7 - mtspr SPRN_SRR1,r8 - RFI_TO_KERNEL - b . /* prevent speculative execution */ - -.Lsyscall_error: - oris r5,r5,0x1000 /* Set SO bit in CR */ - neg r3,r3 - std r5,_CCR(r1) - b .Lsyscall_error_cont - -/* Traced system call support */ -.Lsyscall_dotrace: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_enter - - /* - * We use the return value of do_syscall_trace_enter() as the syscall - * number. If the syscall was rejected for any reason do_syscall_trace_enter() - * returns an invalid syscall number and the test below against - * NR_syscalls will fail. - */ - mr r0,r3 - - /* Restore argument registers just clobbered and/or possibly changed. */ - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) - - /* Repopulate r9 and r10 for the syscall path */ - addi r9,r1,STACK_FRAME_OVERHEAD - ld r10, PACA_THREAD_INFO(r13) - ld r10,TI_FLAGS(r10) - - cmpldi r0,NR_syscalls - blt+ .Lsyscall - - /* Return code is already in r3 thanks to do_syscall_trace_enter() */ - b .Lsyscall_exit - - -.Lsyscall_enosys: - li r3,-ENOSYS - b .Lsyscall_exit - -.Lsyscall_exit_work: - /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. - If TIF_NOERROR is set, just save r3 as it is. */ - - andi. r0,r9,_TIF_RESTOREALL - beq+ 0f +.Lsyscall_restore_regs: + ld r3,_CTR(r1) + ld r4,_XER(r1) REST_NVGPRS(r1) - b 2f -0: cmpld r3,r11 /* r11 is -MAX_ERRNO */ - blt+ 1f - andi. r0,r9,_TIF_NOERROR - bne- 1f - ld r5,_CCR(r1) - neg r3,r3 - oris r5,r5,0x1000 /* Set SO bit in CR */ - std r5,_CCR(r1) -1: std r3,GPR3(r1) -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) - beq 4f - - /* Clear per-syscall TIF flags if any are set. */ - - li r11,_TIF_PERSYSCALL_MASK - addi r12,r12,TI_FLAGS -3: ldarx r10,0,r12 - andc r10,r10,r11 - stdcx. r10,0,r12 - bne- 3b - subi r12,r12,TI_FLAGS - -4: /* Anything else left to do? */ -BEGIN_FTR_SECTION - lis r3,DEFAULT_PPR@highest /* Set default PPR */ - sldi r3,r3,32 /* bits 11-13 are used for ppr */ - std r3,_PPR(r1) -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) - beq ret_from_except_lite - - /* Re-enable interrupts */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 1 -#else - li r10,MSR_RI - ori r10,r10,MSR_EE - mtmsrd r10,1 -#endif /* CONFIG_PPC_BOOK3E */ - - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_leave - b ret_from_except + mtctr r3 + mtspr SPRN_XER,r4 + ld r0,GPR0(r1) + REST_8GPRS(4, r1) + ld r12,GPR12(r1) + b .Lsyscall_restore_regs_cont #ifdef CONFIG_PPC_TRANSACTIONAL_MEM .Ltabort_syscall: @@ -438,8 +196,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) RFI_TO_USER b . /* prevent speculative execution */ #endif -_ASM_NOKPROBE_SYMBOL(system_call_common); -_ASM_NOKPROBE_SYMBOL(system_call_exit); _GLOBAL(ret_from_fork) bl schedule_tail diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 800433685888..d396efca4068 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -10,8 +10,6 @@ #ifndef _POWERPC_ARCH_SIGNAL_H #define _POWERPC_ARCH_SIGNAL_H -extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); - extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32); diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c new file mode 100644 index 000000000000..75be20fdb270 --- /dev/null +++ b/arch/powerpc/kernel/syscall_64.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef long (*syscall_fn)(long, long, long, long, long, long); + +/* Has to run notrace because it is entered "unreconciled" */ +notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs) +{ + unsigned long ti_flags; + syscall_fn f; + + BUG_ON(!(regs->msr & MSR_PR)); + + account_cpu_user_entry(); + +#ifdef CONFIG_PPC_SPLPAR + if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && + firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct lppaca *lp = local_paca->lppaca_ptr; + + if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) + accumulate_stolen_time(); + } +#endif + + kuap_check_amr(); + + /* + * A syscall should always be called with interrupts enabled + * so we just unconditionally hard-enable here. When some kind + * of irq tracing is used, we additionally check that condition + * is correct + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(irq_soft_mask_return() != IRQS_ENABLED); + WARN_ON(local_paca->irq_happened); + } + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + regs->softe = IRQS_ENABLED; + + __hard_irq_enable(); + + ti_flags = current_thread_info()->flags; + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + /* + * We use the return value of do_syscall_trace_enter() as the + * syscall number. If the syscall was rejected for any reason + * do_syscall_trace_enter() returns an invalid syscall number + * and the test against NR_syscalls will fail and the return + * value to be used is in regs->gpr[3]. + */ + r0 = do_syscall_trace_enter(regs); + if (unlikely(r0 >= NR_syscalls)) + return regs->gpr[3]; + r3 = regs->gpr[3]; + r4 = regs->gpr[4]; + r5 = regs->gpr[5]; + r6 = regs->gpr[6]; + r7 = regs->gpr[7]; + r8 = regs->gpr[8]; + + } else if (unlikely(r0 >= NR_syscalls)) { + return -ENOSYS; + } + + /* May be faster to do array_index_nospec? */ + barrier_nospec(); + + if (unlikely(ti_flags & _TIF_32BIT)) { + f = (void *)compat_sys_call_table[r0]; + + r3 &= 0x00000000ffffffffULL; + r4 &= 0x00000000ffffffffULL; + r5 &= 0x00000000ffffffffULL; + r6 &= 0x00000000ffffffffULL; + r7 &= 0x00000000ffffffffULL; + r8 &= 0x00000000ffffffffULL; + + } else { + f = (void *)sys_call_table[r0]; + } + + return f(r3, r4, r5, r6, r7, r8); +} + +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs) +{ + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long ti_flags; + unsigned long ret = 0; + + regs->result = r3; + + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); + + ti_flags = *ti_flagsp; + + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ + } + } + + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp); + } else { + regs->gpr[3] = r3; + } + + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; + } + +again: + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); + if (ti_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + /* + * SIGPENDING must restore signal handler function + * argument GPRs, and some non-volatiles (e.g., r1). + * Restore all for now. This could be made lighter. + */ + if (ti_flags & _TIF_SIGPENDING) + ret |= _TIF_RESTOREALL; + do_notify_resume(regs, ti_flags); + } + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + } + + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely((ti_flags & _TIF_RESTORE_TM))) { + restore_tm_state(regs); + } else { + unsigned long mathflags = MSR_FP; + + if (cpu_has_feature(CPU_FTR_VSX)) + mathflags |= MSR_VEC | MSR_VSX; + else if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mathflags |= MSR_VEC; + + if ((regs->msr & mathflags) != mathflags) + restore_math(regs); + } + } + + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + /* This pattern matches prep_irq_for_idle */ + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending())) { + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + /* Took an interrupt which may have more exit work to do. */ + goto again; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + kuap_check_amr(); + + account_cpu_user_exit(); + + return ret; +} diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 5b905a2f4e4d..d34276f3c495 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -16,25 +16,22 @@ #ifdef CONFIG_PPC64 .p2align 3 +#define __SYSCALL(nr, entry) .8byte entry +#else +#define __SYSCALL(nr, entry) .long entry #endif .globl sys_call_table sys_call_table: #ifdef CONFIG_PPC64 -#define __SYSCALL(nr, entry) .8byte DOTSYM(entry) #include -#undef __SYSCALL #else -#define __SYSCALL(nr, entry) .long entry #include -#undef __SYSCALL #endif #ifdef CONFIG_COMPAT .globl compat_sys_call_table compat_sys_call_table: #define compat_sys_sigsuspend sys_sigsuspend -#define __SYSCALL(nr, entry) .8byte DOTSYM(entry) #include -#undef __SYSCALL #endif -- cgit v1.2.3-59-g8ed1b From 993c670a4dfb2bded39d96f11df763c657d15a41 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:35 +1000 Subject: powerpc/64/syscall: Zero volatile registers when returning Kernel addresses and potentially other sensitive data could be leaked in volatile registers after a syscall. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-27-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5f70830b5ae4..29949bbe857b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -141,6 +141,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) cmpdi r3,0 bne .Lsyscall_restore_regs + /* Zero volatile regs that may contain sensitive kernel data */ + li r0,0 + li r4,0 + li r5,0 + li r6,0 + li r7,0 + li r8,0 + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtctr r0 + mtspr SPRN_XER,r0 .Lsyscall_restore_regs_cont: BEGIN_FTR_SECTION -- cgit v1.2.3-59-g8ed1b From 3282a3da25bd63fdb7240bc35dbdefa4b1947005 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:36 +1000 Subject: powerpc/64: Implement soft interrupt replay in C When local_irq_enable() finds a pending soft-masked interrupt, it "replays" it by setting up registers like the initial interrupt entry, then calls into the low level handler to set up an interrupt stack frame and process the interrupt. This is not necessary, and uses more stack than needed. The high level interrupt handler can be called directly from C, with just pt_regs set up on stack. This should be faster and use less stack. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-28-npiggin@gmail.com --- arch/powerpc/include/asm/hw_irq.h | 1 - arch/powerpc/kernel/exceptions-64e.S | 32 ------- arch/powerpc/kernel/exceptions-64s.S | 47 ---------- arch/powerpc/kernel/irq.c | 165 +++++++++++++++++++++++++++-------- 4 files changed, 130 insertions(+), 115 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 310583e62bd9..0e9a9598f91f 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -52,7 +52,6 @@ #ifndef __ASSEMBLY__ extern void replay_system_reset(void); -extern void __replay_interrupt(unsigned int vector); extern void timer_interrupt(struct pt_regs *); extern void timer_broadcast_interrupt(void); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e4076e3c072d..4efac5490216 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1002,38 +1002,6 @@ masked_interrupt_book3e_0x280: masked_interrupt_book3e_0x2c0: masked_interrupt_book3e PACA_IRQ_DBELL 0 -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 - * to indicate the kind of interrupt. MSR:EE is already off. - * We generate a stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about. - */ - mflr r10 - mfmsr r11 - mfcr r4 - mtspr SPRN_SPRG_GEN_SCRATCH,r13; - std r1,PACA_EXGEN+EX_R1(r13); - stw r4,PACA_EXGEN+EX_CR(r13); - ori r11,r11,MSR_EE - subi r1,r1,INT_FRAME_SIZE; - cmpwi cr0,r3,0x500 - beq exc_0x500_common - cmpwi cr0,r3,0x900 - beq exc_0x900_common - cmpwi cr0,r3,0x280 - beq exc_0x280_common - blr - - /* * This is called from 0x300 and 0x400 handlers after the prologs with * r14 and r15 containing the fault address and error code, with the diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d75df223da7c..d6536a7c2a01 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -3165,50 +3165,3 @@ doorbell_super_common_msgclr: LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) PPC_MSGCLRP(3) b doorbell_super_common_virt - -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate - * which kind of interrupt. MSR:EE is already off. We generate a - * stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - * - * Note that we don't specify LR as the NIP (return address) for - * the interrupt because that would unbalance the return branch - * predictor. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about, so we don't bother storing them. - */ - mfmsr r12 - LOAD_REG_ADDR(r11, replay_interrupt_return) - mfcr r9 - ori r12,r12,MSR_EE - cmpwi r3,0x900 - beq decrementer_common_virt - cmpwi r3,0x500 -BEGIN_FTR_SECTION - beq h_virt_irq_common_virt -FTR_SECTION_ELSE - beq hardware_interrupt_common_virt -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) - cmpwi r3,0xf00 - beq performance_monitor_common_virt -BEGIN_FTR_SECTION - cmpwi r3,0xa00 - beq h_doorbell_common_msgclr - cmpwi r3,0xe60 - beq hmi_exception_common_virt -FTR_SECTION_ELSE - cmpwi r3,0xa00 - beq doorbell_super_common_msgclr -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -replay_interrupt_return: - blr - -_ASM_NOKPROBE_SYMBOL(__replay_interrupt) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1bed18b7229e..2e5dca87b936 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,6 +70,7 @@ #include #include #include +#include #endif #define CREATE_TRACE_POINTS #include @@ -230,10 +231,121 @@ notrace unsigned int __check_irq_replay(void) return 0; } +static void replay_soft_interrupts(void) +{ + /* + * We use local_paca rather than get_paca() to avoid all + * the debug_smp_processor_id() business in this low level + * function + */ + unsigned char happened = local_paca->irq_happened; + struct pt_regs regs; + + ppc_save_regs(®s); + regs.softe = IRQS_ALL_DISABLED; + +again: + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + if (happened & PACA_IRQ_HARD_DIS) { + /* + * We may have missed a decrementer interrupt if hard disabled. + * Check the decrementer register in case we had a rollover + * while hard disabled. + */ + if (!(happened & PACA_IRQ_DEC)) { + if (decrementer_check_overflow()) + happened |= PACA_IRQ_DEC; + } + } + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); + } + + /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) { + local_paca->irq_happened &= ~PACA_IRQ_HMI; + regs.trap = 0xe60; + handle_hmi_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (happened & PACA_IRQ_DEC) { + local_paca->irq_happened &= ~PACA_IRQ_DEC; + regs.trap = 0x900; + timer_interrupt(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (happened & PACA_IRQ_EE) { + local_paca->irq_happened &= ~PACA_IRQ_EE; + regs.trap = 0x500; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + /* + * Check if an EPR external interrupt happened this bit is typically + * set if we need to handle another "edge" interrupt from within the + * MPIC "EPR" handler. + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3E) && (happened & PACA_IRQ_EE_EDGE)) { + local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; + regs.trap = 0x500; + do_IRQ(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) { + local_paca->irq_happened &= ~PACA_IRQ_DBELL; + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) + regs.trap = 0x280; + else + regs.trap = 0xa00; + doorbell_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + /* Book3E does not support soft-masking PMI interrupts */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + regs.trap = 0xf00; + performance_monitor_exception(®s); + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) + hard_irq_disable(); + } + + happened = local_paca->irq_happened; + if (happened & ~PACA_IRQ_HARD_DIS) { + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + trace_hardirqs_on(); + trace_hardirqs_off(); + goto again; + } +} + notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; - unsigned int replay; /* Write the new soft-enabled value */ irq_soft_mask_set(mask); @@ -255,24 +367,16 @@ notrace void arch_local_irq_restore(unsigned long mask) */ irq_happened = get_irq_happened(); if (!irq_happened) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); return; } - /* - * We need to hard disable to get a trusted value from - * __check_irq_replay(). We also need to soft-disable - * again to avoid warnings in there due to the use of - * per-cpu variables. - */ + /* We need to hard disable to replay. */ if (!(irq_happened & PACA_IRQ_HARD_DIS)) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG } else { /* * We should already be hard disabled here. We had bugs @@ -280,35 +384,26 @@ notrace void arch_local_irq_restore(unsigned long mask) * warn if we are wrong. Only do that when IRQ tracing * is enabled as mfmsr() can be costly. */ - if (WARN_ON_ONCE(mfmsr() & MSR_EE)) - __hard_irq_disable(); -#endif + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + + if (irq_happened == PACA_IRQ_HARD_DIS) { + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } } irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); - /* - * Check if anything needs to be re-emitted. We haven't - * soft-enabled yet to avoid warnings in decrementer_check_overflow - * accessing per-cpu variables - */ - replay = __check_irq_replay(); + replay_soft_interrupts(); + local_paca->irq_happened = 0; - /* We can soft-enable now */ trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); - - /* - * And replay if we have to. This will return with interrupts - * hard-enabled. - */ - if (replay) { - __replay_interrupt(replay); - return; - } - - /* Finally, let's ensure we are hard enabled */ __hard_irq_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); -- cgit v1.2.3-59-g8ed1b From 6cc0c16d82f889f0083f3608237189afb55b67be Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:37 +1000 Subject: powerpc/64s: Implement interrupt exit logic in C Implement the bulk of interrupt return logic in C. The asm return code must handle a few cases: restoring full GPRs, and emulating stack store. The stack store emulation is significantly simplfied, rather than creating a new return frame and switching to that before performing the store, it uses the PACA to keep a scratch register around to perform the store. The asm return code is moved into 64e for now. The new logic has made allowance for 64e, but I don't have a full environment that works well to test it, and even booting in emulated qemu is not great for stress testing. 64e shouldn't be too far off working with this, given a bit more testing and auditing of the logic. This is slightly faster on a POWER9 (page fault speed increases about 1.1%), probably due to reduced mtmsrd. mpe: Includes fixes from Nick for _TIF_EMULATE_STACK_STORE handling (including the fast_interrupt_return path), to remove trace_hardirqs_on(), and fixes the interrupt-return part of the MSR_VSX restore bug caught by tm-unavailable selftest. mpe: Incorporate fix from Nick: The return-to-kernel path has to replay any soft-pending interrupts if it is returning to a context that had interrupts soft-enabled. It has to do this carefully and avoid plain enabling interrupts if this is an irq context, which can cause multiple nesting of interrupts on the stack, and other unexpected issues. The code which avoided this case got the soft-mask state wrong, and marked interrupts as enabled before going around again to retry. This seems to be mostly harmless except when PREEMPT=y, this calls preempt_schedule_irq with irqs apparently enabled and runs into a BUG in kernel/sched/core.c Signed-off-by: Nicholas Piggin Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-29-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 2 + arch/powerpc/include/asm/book3s/64/kup-radix.h | 10 + arch/powerpc/include/asm/hw_irq.h | 1 + arch/powerpc/include/asm/switch_to.h | 6 + arch/powerpc/kernel/entry_64.S | 487 ++++++------------------- arch/powerpc/kernel/exceptions-64e.S | 255 ++++++++++++- arch/powerpc/kernel/exceptions-64s.S | 119 +++--- arch/powerpc/kernel/irq.c | 36 +- arch/powerpc/kernel/process.c | 89 ++--- arch/powerpc/kernel/syscall_64.c | 171 ++++++++- arch/powerpc/kernel/vector.S | 2 +- 11 files changed, 652 insertions(+), 526 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index ab59a4904254..7d81e86a1e5d 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -99,6 +99,8 @@ void __init machine_init(u64 dt_ptr); #endif long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 71081d90f999..3bcef989a35d 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -60,6 +60,12 @@ #include #include +static inline void kuap_restore_amr(struct pt_regs *regs) +{ + if (mmu_has_feature(MMU_FTR_RADIX_KUAP)) + mtspr(SPRN_AMR, regs->kuap); +} + static inline void kuap_check_amr(void) { if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP)) @@ -136,6 +142,10 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); } #else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs) +{ +} + static inline void kuap_check_amr(void) { } diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 0e9a9598f91f..e0e71777961f 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -52,6 +52,7 @@ #ifndef __ASSEMBLY__ extern void replay_system_reset(void); +extern void replay_soft_interrupts(void); extern void timer_interrupt(struct pt_regs *); extern void timer_broadcast_interrupt(void); diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 476008bc3d08..b867b58b1093 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -23,7 +23,13 @@ extern void switch_booke_debug_regs(struct debug_reg *new_debug); extern int emulate_altivec(struct pt_regs *); +#ifdef CONFIG_PPC_BOOK3S_64 void restore_math(struct pt_regs *regs); +#else +static inline void restore_math(struct pt_regs *regs) +{ +} +#endif void restore_tm_state(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 29949bbe857b..5d782acb86d4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -228,6 +229,7 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit +#ifdef CONFIG_PPC_BOOK3E /* Save non-volatile GPRs, if not already saved. */ _GLOBAL(save_nvgprs) ld r11,_TRAP(r1) @@ -238,6 +240,7 @@ _GLOBAL(save_nvgprs) std r0,_TRAP(r1) blr _ASM_NOKPROBE_SYMBOL(save_nvgprs); +#endif #ifdef CONFIG_PPC_BOOK3S_64 @@ -301,7 +304,7 @@ flush_count_cache: * state of one is saved on its kernel stack. Then the state * of the other is restored from its kernel stack. The memory * management hardware is updated to the second process's state. - * Finally, we can return to the second process, via ret_from_except. + * Finally, we can return to the second process, via interrupt_return. * On entry, r3 points to the THREAD for the current task, r4 * points to the THREAD for the new task. * @@ -453,408 +456,152 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr - .align 7 -_GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne ret_from_except_lite - REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) +#ifdef CONFIG_PPC_BOOK3S /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r10,MSR_RI - mtmsrd r10,1 /* Update machine state */ -#endif /* CONFIG_PPC_BOOK3E */ + * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not + * touched, AMR not set, no exit work created, then this can be used. + */ + .balign IFETCH_ALIGN_BYTES + .globl fast_interrupt_return +fast_interrupt_return: +_ASM_NOKPROBE_SYMBOL(fast_interrupt_return) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + bne .Lfast_user_interrupt_return + andi. r0,r4,MSR_RI + li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ + bne+ .Lfast_kernel_interrupt_return + addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b . /* should not get here */ - ld r9, PACA_THREAD_INFO(r13) - ld r3,_MSR(r1) -#ifdef CONFIG_PPC_BOOK3E - ld r10,PACACURRENT(r13) -#endif /* CONFIG_PPC_BOOK3E */ - ld r4,TI_FLAGS(r9) - andi. r3,r3,MSR_PR - beq resume_kernel -#ifdef CONFIG_PPC_BOOK3E - lwz r3,(THREAD+THREAD_DBCR0)(r10) -#endif /* CONFIG_PPC_BOOK3E */ + .balign IFETCH_ALIGN_BYTES + .globl interrupt_return +interrupt_return: +_ASM_NOKPROBE_SYMBOL(interrupt_return) + REST_NVGPRS(r1) - /* Check current_thread_info()->flags */ - andi. r0,r4,_TIF_USER_WORK_MASK - bne 1f -#ifdef CONFIG_PPC_BOOK3E - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - andis. r0,r3,DBCR0_IDM@h - beq restore - mfmsr r0 - rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ - mtmsr r0 - mtspr SPRN_DBCR0,r3 - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore -#else - addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_math - b restore -#endif -1: andi. r0,r4,_TIF_NEED_RESCHED - beq 2f - bl restore_interrupts - SCHEDULE_USER - b ret_from_except_lite -2: -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM - bne 3f /* only restore TM if nothing else to do */ + .balign IFETCH_ALIGN_BYTES + .globl interrupt_return_lite +interrupt_return_lite: +_ASM_NOKPROBE_SYMBOL(interrupt_return_lite) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return addi r3,r1,STACK_FRAME_OVERHEAD - bl restore_tm_state - b restore -3: -#endif - bl save_nvgprs - /* - * Use a non volatile GPR to save and restore our thread_info flags - * across the call to restore_interrupts. - */ - mr r30,r4 - bl restore_interrupts - mr r4,r30 - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_notify_resume - b ret_from_except - -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - andis. r8,r4,_TIF_EMULATE_STACK_STORE@h - beq+ 1f + bl interrupt_exit_user_prepare + cmpdi r3,0 + bne- .Lrestore_nvgprs - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ +.Lfast_user_interrupt_return: + ld r11,_NIP(r1) + ld r12,_MSR(r1) +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - ld r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: ldx r0,r6,r4 - stdx r0,r6,r3 - addi r6,r6,8 - bdnz 2b - - /* Do real store operation to complete stdu */ - ld r5,GPR1(r1) - std r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS -0: ldarx r4,0,r5 - andc r4,r4,r11 - stdcx. r4,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPTION - /* Check if we need to preempt */ - andi. r0,r4,_TIF_NEED_RESCHED - beq+ restore - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) - andi. r0,r0,IRQS_DISABLED - bne restore + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first and reconcile irq state. - */ - RECONCILE_IRQ_STATE(r3,r4) - bl preempt_schedule_irq + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + REST_GPR(13, r1) - /* - * arch_local_irq_restore() from preempt_schedule_irq above may - * enable hard interrupt but we really should disable interrupts - * when we return from the interrupt, and so that we don't get - * interrupted after loading SRR0/1. - */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - li r10,MSR_RI - mtmsrd r10,1 /* Update machine state */ -#endif /* CONFIG_PPC_BOOK3E */ -#endif /* CONFIG_PREEMPTION */ + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 - .globl fast_exc_return_irq -fast_exc_return_irq: -restore: - /* - * This is the main kernel exit path. First we check if we - * are about to re-enable interrupts - */ - ld r5,SOFTE(r1) - lbz r6,PACAIRQSOFTMASK(r13) - andi. r5,r5,IRQS_DISABLED - bne .Lrestore_irq_off + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + RFI_TO_USER + b . /* prevent speculative execution */ - /* We are enabling, were we already enabled ? Yes, just return */ - andi. r6,r6,IRQS_DISABLED - beq cr0,.Ldo_restore +.Lrestore_nvgprs: + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return - /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- .Lrestore_check_irq_replay + .balign IFETCH_ALIGN_BYTES +.Lkernel_interrupt_return: + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_kernel_prepare - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -.Lrestore_no_replay: - TRACE_ENABLE_INTS - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13); +.Lfast_kernel_interrupt_return: + cmpdi cr1,r3,0 + ld r11,_NIP(r1) + ld r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 - /* - * Final return path. BookE is handled in a different file - */ -.Ldo_restore: -#ifdef CONFIG_PPC_BOOK3E - b exception_return_book3e -#else - /* - * Clear the reservation. If we know the CPU tracks the address of - * the reservation then we can potentially save some cycles and use - * a larx. On POWER6 and POWER7 this is significantly faster. - */ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ FTR_SECTION_ELSE - ldarx r4,0,r1 + ldarx r0,0,r1 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - /* - * Some code path such as load_up_fpu or altivec return directly - * here. They run entirely hard disabled and do not alter the - * interrupt state. They also don't use lwarx/stwcx. and thus - * are known not to leave dangling reservations. - */ - .globl fast_exception_return -fast_exception_return: - ld r3,_MSR(r1) + ld r3,_LINK(r1) ld r4,_CTR(r1) - ld r0,_LINK(r1) - mtctr r4 - mtlr r0 - ld r4,_XER(r1) - mtspr SPRN_XER,r4 - - kuap_check_amr r5, r6 - - REST_8GPRS(5, r1) - - andi. r0,r3,MSR_RI - beq- .Lunrecov_restore - - /* - * Clear RI before restoring r13. If we are returning to - * userspace and we take an exception after restoring r13, - * we end up corrupting the userspace r13 value. - */ - li r4,0 - mtmsrd r4,1 - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* TM debug */ - std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */ -#endif - /* - * r13 is our per cpu area, only restore it if we are returning to - * userspace the value stored in the stack frame may belong to - * another CPU. - */ - andi. r0,r3,MSR_PR - beq 1f -BEGIN_FTR_SECTION - /* Restore PPR */ - ld r2,_PPR(r1) - mtspr SPRN_PPR,r2 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ACCOUNT_CPU_USER_EXIT(r13, r2, r4) - REST_GPR(13, r1) - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtspr SPRN_SRR1,r3 - - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 - - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r4,GPR4(r1) - ld r1,GPR1(r1) - RFI_TO_USER - b . /* prevent speculative execution */ + ld r5,_XER(r1) + ld r6,_CCR(r1) + li r0,0 -1: mtspr SPRN_SRR1,r3 + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) - ld r2,_CCR(r1) - mtcrf 0xFF,r2 - ld r2,_NIP(r1) - mtspr SPRN_SRR0,r2 + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 /* * Leaving a stale exception_marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - li r2,0 - std r2,STACK_FRAME_OVERHEAD-16(r1) + std r0,STACK_FRAME_OVERHEAD-16(r1) - ld r0,GPR0(r1) - ld r2,GPR2(r1) - ld r3,GPR3(r1) + REST_4GPRS(2, r1) - kuap_restore_amr r4 - - ld r4,GPR4(r1) - ld r1,GPR1(r1) + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) RFI_TO_KERNEL b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_BOOK3E */ - - /* - * We are returning to a context with interrupts soft disabled. - * - * However, we may also about to hard enable, so we need to - * make sure that in this case, we also clear PACA_IRQ_HARD_DIS - * or that bit can get out of sync and bad things will happen - */ -.Lrestore_irq_off: - ld r3,_MSR(r1) - lbz r7,PACAIRQHAPPENED(r13) - andi. r0,r3,MSR_EE - beq 1f - rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS - stb r7,PACAIRQHAPPENED(r13) -1: -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - /* The interrupt should not have soft enabled. */ - lbz r7,PACAIRQSOFTMASK(r13) -1: tdeqi r7,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - b .Ldo_restore - - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -.Lrestore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl __check_irq_replay - cmpwi cr0,r3,0 - beq .Lrestore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) - - /* - * PACA_IRQ_HARD_DIS won't always be set here, so set it now - * to reconcile the IRQ state. Tracing is already accounted for. - */ - lbz r4,PACAIRQHAPPENED(r13) - ori r4,r4,PACA_IRQ_HARD_DIS - stb r4,PACAIRQHAPPENED(r13) - - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl do_IRQ - b ret_from_except -1: cmpwi cr0,r3,0xf00 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl performance_monitor_exception - b ret_from_except -1: cmpwi cr0,r3,0xe60 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl handle_hmi_exception - b ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl timer_interrupt - b ret_from_except -#ifdef CONFIG_PPC_DOORBELL -1: -#ifdef CONFIG_PPC_BOOK3E - cmpwi cr0,r3,0x280 -#else - cmpwi cr0,r3,0xa00 -#endif /* CONFIG_PPC_BOOK3E */ - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl doorbell_exception -#endif /* CONFIG_PPC_DOORBELL */ -1: b ret_from_except /* What else to do here ? */ - -.Lunrecov_restore: - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b .Lunrecov_restore - -_ASM_NOKPROBE_SYMBOL(ret_from_except); -_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); -_ASM_NOKPROBE_SYMBOL(resume_kernel); -_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq); -_ASM_NOKPROBE_SYMBOL(restore); -_ASM_NOKPROBE_SYMBOL(fast_exception_return); +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * PACA_EXGEN as temporary storage to hold the store data, as + * interrupts are disabled here so it won't be clobbered. + */ + mtcr r6 + std r9,PACA_EXGEN+0(r13) + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + std r9,0(r1) /* perform store component of stdu */ + ld r9,PACA_EXGEN+0(r13) + RFI_TO_KERNEL + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_RTAS /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4efac5490216..d9ed79415100 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -24,6 +24,7 @@ #include #include #include +#include /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... @@ -1041,17 +1042,161 @@ alignment_more: bl alignment_exception b ret_from_except -/* - * We branch here from entry_64.S for the last stage of the exception - * return code path. MSR:EE is expected to be off at that point - */ -_GLOBAL(exception_return_book3e) - b 1f + .align 7 +_GLOBAL(ret_from_except) + ld r11,_TRAP(r1) + andi. r0,r11,1 + bne ret_from_except_lite + REST_NVGPRS(r1) + +_GLOBAL(ret_from_except_lite) + /* + * Disable interrupts so that current_thread_info()->flags + * can't change between when we test it and when we return + * from the interrupt. + */ + wrteei 0 + + ld r9, PACA_THREAD_INFO(r13) + ld r3,_MSR(r1) + ld r10,PACACURRENT(r13) + ld r4,TI_FLAGS(r9) + andi. r3,r3,MSR_PR + beq resume_kernel + lwz r3,(THREAD+THREAD_DBCR0)(r10) + + /* Check current_thread_info()->flags */ + andi. r0,r4,_TIF_USER_WORK_MASK + bne 1f + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + andis. r0,r3,DBCR0_IDM@h + beq restore + mfmsr r0 + rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ + mtmsr r0 + mtspr SPRN_DBCR0,r3 + li r10, -1 + mtspr SPRN_DBSR,r10 + b restore +1: andi. r0,r4,_TIF_NEED_RESCHED + beq 2f + bl restore_interrupts + SCHEDULE_USER + b ret_from_except_lite +2: + bl save_nvgprs + /* + * Use a non volatile GPR to save and restore our thread_info flags + * across the call to restore_interrupts. + */ + mr r30,r4 + bl restore_interrupts + mr r4,r30 + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_notify_resume + b ret_from_except + +resume_kernel: + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ + andis. r8,r4,_TIF_EMULATE_STACK_STORE@h + beq+ 1f + + addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ + + ld r3,GPR1(r1) + subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ + li r6,0 /* start offset: 0 */ + mtctr r5 +2: ldx r0,r6,r4 + stdx r0,r6,r3 + addi r6,r6,8 + bdnz 2b + + /* Do real store operation to complete stdu */ + ld r5,GPR1(r1) + std r8,0(r5) + + /* Clear _TIF_EMULATE_STACK_STORE flag */ + lis r11,_TIF_EMULATE_STACK_STORE@h + addi r5,r9,TI_FLAGS +0: ldarx r4,0,r5 + andc r4,r4,r11 + stdcx. r4,0,r5 + bne- 0b +1: + +#ifdef CONFIG_PREEMPT + /* Check if we need to preempt */ + andi. r0,r4,_TIF_NEED_RESCHED + beq+ restore + /* Check that preempt_count() == 0 and interrupts are enabled */ + lwz r8,TI_PREEMPT(r9) + cmpwi cr0,r8,0 + bne restore + ld r0,SOFTE(r1) + andi. r0,r0,IRQS_DISABLED + bne restore + + /* + * Here we are preempting the current task. We want to make + * sure we are soft-disabled first and reconcile irq state. + */ + RECONCILE_IRQ_STATE(r3,r4) + bl preempt_schedule_irq + + /* + * arch_local_irq_restore() from preempt_schedule_irq above may + * enable hard interrupt but we really should disable interrupts + * when we return from the interrupt, and so that we don't get + * interrupted after loading SRR0/1. + */ + wrteei 0 +#endif /* CONFIG_PREEMPT */ + +restore: + /* + * This is the main kernel exit path. First we check if we + * are about to re-enable interrupts + */ + ld r5,SOFTE(r1) + lbz r6,PACAIRQSOFTMASK(r13) + andi. r5,r5,IRQS_DISABLED + bne .Lrestore_irq_off + + /* We are enabling, were we already enabled ? Yes, just return */ + andi. r6,r6,IRQS_DISABLED + beq cr0,fast_exception_return + + /* + * We are about to soft-enable interrupts (we are hard disabled + * at this point). We check if there's anything that needs to + * be replayed first. + */ + lbz r0,PACAIRQHAPPENED(r13) + cmpwi cr0,r0,0 + bne- .Lrestore_check_irq_replay + + /* + * Get here when nothing happened while soft-disabled, just + * soft-enable and move-on. We will hard-enable as a side + * effect of rfi + */ +.Lrestore_no_replay: + TRACE_ENABLE_INTS + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13); /* This is the return from load_up_fpu fast path which could do with * less GPR restores in fact, but for now we have a single return path */ - .globl fast_exception_return fast_exception_return: wrteei 0 1: mr r0,r13 @@ -1092,6 +1237,102 @@ fast_exception_return: mfspr r13,SPRN_SPRG_GEN_SCRATCH rfi + /* + * We are returning to a context with interrupts soft disabled. + * + * However, we may also about to hard enable, so we need to + * make sure that in this case, we also clear PACA_IRQ_HARD_DIS + * or that bit can get out of sync and bad things will happen + */ +.Lrestore_irq_off: + ld r3,_MSR(r1) + lbz r7,PACAIRQHAPPENED(r13) + andi. r0,r3,MSR_EE + beq 1f + rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS + stb r7,PACAIRQHAPPENED(r13) +1: +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) + /* The interrupt should not have soft enabled. */ + lbz r7,PACAIRQSOFTMASK(r13) +1: tdeqi r7,IRQS_ENABLED + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif + b fast_exception_return + + /* + * Something did happen, check if a re-emit is needed + * (this also clears paca->irq_happened) + */ +.Lrestore_check_irq_replay: + /* XXX: We could implement a fast path here where we check + * for irq_happened being just 0x01, in which case we can + * clear it and return. That means that we would potentially + * miss a decrementer having wrapped all the way around. + * + * Still, this might be useful for things like hash_page + */ + bl __check_irq_replay + cmpwi cr0,r3,0 + beq .Lrestore_no_replay + + /* + * We need to re-emit an interrupt. We do so by re-using our + * existing exception frame. We first change the trap value, + * but we need to ensure we preserve the low nibble of it + */ + ld r4,_TRAP(r1) + clrldi r4,r4,60 + or r4,r4,r3 + std r4,_TRAP(r1) + + /* + * PACA_IRQ_HARD_DIS won't always be set here, so set it now + * to reconcile the IRQ state. Tracing is already accounted for. + */ + lbz r4,PACAIRQHAPPENED(r13) + ori r4,r4,PACA_IRQ_HARD_DIS + stb r4,PACAIRQHAPPENED(r13) + + /* + * Then find the right handler and call it. Interrupts are + * still soft-disabled and we keep them that way. + */ + cmpwi cr0,r3,0x500 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl do_IRQ + b ret_from_except +1: cmpwi cr0,r3,0xf00 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl performance_monitor_exception + b ret_from_except +1: cmpwi cr0,r3,0xe60 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl handle_hmi_exception + b ret_from_except +1: cmpwi cr0,r3,0x900 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl timer_interrupt + b ret_from_except +#ifdef CONFIG_PPC_DOORBELL +1: + cmpwi cr0,r3,0x280 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl doorbell_exception +#endif /* CONFIG_PPC_DOORBELL */ +1: b ret_from_except /* What else to do here ? */ + +_ASM_NOKPROBE_SYMBOL(ret_from_except); +_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); +_ASM_NOKPROBE_SYMBOL(resume_kernel); +_ASM_NOKPROBE_SYMBOL(restore); +_ASM_NOKPROBE_SYMBOL(fast_exception_return); + /* * Trampolines used when spotting a bad kernel stack pointer in * the exception entry code. diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d6536a7c2a01..11244031adc5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -589,6 +589,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r10,GPR12(r1) std r11,GPR13(r1) + SAVE_NVGPRS(r1) + .if IDAR .if IISIDE ld r10,_NIP(r1) @@ -625,7 +627,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mfspr r11,SPRN_XER /* save XER in stackframe */ std r10,SOFTE(r1) std r11,_XER(r1) - li r9,(IVEC)+1 + li r9,IVEC std r9,_TRAP(r1) /* set trap number */ li r10,0 ld r11,exception_marker@toc(r2) @@ -932,7 +934,6 @@ EXC_COMMON_BEGIN(system_reset_common) ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - bl save_nvgprs /* * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does * the right thing. We do not want to reconcile because that goes @@ -1115,7 +1116,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ @@ -1208,10 +1208,9 @@ EXC_COMMON_BEGIN(machine_check_common) /* Enable MSR_RI when finished with PACA_EXMC */ li r10,MSR_RI mtmsrd r10,1 - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception - b ret_from_except + b interrupt_return GEN_KVM machine_check @@ -1378,20 +1377,19 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_exception_return + b fast_interrupt_return 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) ld r4,_DAR(r1) ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b ret_from_except + b interrupt_return GEN_KVM data_access_slb @@ -1471,20 +1469,19 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_exception_return + b fast_interrupt_return 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) ld r4,_DAR(r1) ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b ret_from_except + b interrupt_return GEN_KVM instruction_access_slb @@ -1532,7 +1529,7 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b ret_from_except_lite + b interrupt_return_lite GEN_KVM hardware_interrupt @@ -1558,10 +1555,9 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception - b ret_from_except + b interrupt_return GEN_KVM alignment @@ -1622,10 +1618,9 @@ EXC_COMMON_BEGIN(program_check_common) __ISTACK(program_check)=1 __GEN_COMMON_BODY program_check 3: - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception - b ret_from_except + b interrupt_return GEN_KVM program_check @@ -1656,7 +1651,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception @@ -1673,14 +1667,13 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif bl load_up_fpu - b fast_exception_return + b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm - b ret_from_except + b interrupt_return #endif GEN_KVM fp_unavailable @@ -1723,7 +1716,7 @@ EXC_COMMON_BEGIN(decrementer_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt - b ret_from_except_lite + b interrupt_return_lite GEN_KVM decrementer @@ -1814,7 +1807,7 @@ EXC_COMMON_BEGIN(doorbell_super_common) #else bl unknown_exception #endif - b ret_from_except_lite + b interrupt_return_lite GEN_KVM doorbell_super @@ -1986,10 +1979,9 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl single_step_exception - b ret_from_except + b interrupt_return GEN_KVM single_step @@ -2024,7 +2016,6 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION ld r4,_DAR(r1) @@ -2033,7 +2024,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) - b ret_from_except + b interrupt_return GEN_KVM h_data_storage @@ -2058,10 +2049,9 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b ret_from_except + b interrupt_return GEN_KVM h_instr_storage @@ -2084,10 +2074,9 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl emulation_assist_interrupt - b ret_from_except + b interrupt_return GEN_KVM emulation_assist @@ -2169,10 +2158,9 @@ EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception FINISH_NAP RUNLATCH_ON - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception - b ret_from_except + b interrupt_return GEN_KVM hmi_exception @@ -2206,7 +2194,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) #else bl unknown_exception #endif - b ret_from_except_lite + b interrupt_return_lite GEN_KVM h_doorbell @@ -2236,7 +2224,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b ret_from_except_lite + b interrupt_return_lite GEN_KVM h_virt_irq @@ -2283,7 +2271,7 @@ EXC_COMMON_BEGIN(performance_monitor_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b ret_from_except_lite + b interrupt_return_lite GEN_KVM performance_monitor @@ -2323,23 +2311,21 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif bl load_up_altivec - b fast_exception_return + b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm - b ret_from_except + b interrupt_return #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b ret_from_except + b interrupt_return GEN_KVM altivec_unavailable @@ -2381,20 +2367,18 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm - b ret_from_except + b interrupt_return #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception - b ret_from_except + b interrupt_return GEN_KVM vsx_unavailable @@ -2421,10 +2405,9 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception - b ret_from_except + b interrupt_return GEN_KVM facility_unavailable @@ -2451,10 +2434,9 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception - b ret_from_except + b interrupt_return GEN_KVM h_facility_unavailable @@ -2485,10 +2467,9 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception - b ret_from_except + b interrupt_return GEN_KVM cbe_system_error @@ -2514,10 +2495,9 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl instruction_breakpoint_exception - b ret_from_except + b interrupt_return GEN_KVM instruction_breakpoint @@ -2637,10 +2617,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b ret_from_except + b interrupt_return GEN_KVM denorm_exception @@ -2659,10 +2638,9 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception - b ret_from_except + b interrupt_return GEN_KVM cbe_maintenance @@ -2687,14 +2665,13 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) EXC_VIRT_END(altivec_assist, 0x5700, 0x100) EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC bl altivec_assist_exception #else bl unknown_exception #endif - b ret_from_except + b interrupt_return GEN_KVM altivec_assist @@ -2713,10 +2690,9 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception - b ret_from_except + b interrupt_return GEN_KVM cbe_thermal @@ -2749,7 +2725,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - bl save_nvgprs /* * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see @@ -3082,7 +3057,7 @@ do_hash_page: cmpdi r3,0 /* see if __hash_page succeeded */ /* Success */ - beq fast_exc_return_irq /* Return from exception on success */ + beq interrupt_return_lite /* Return from exception on success */ /* Error */ blt- 13f @@ -3099,17 +3074,15 @@ handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 - beq+ ret_from_except_lite - bl save_nvgprs + beq+ interrupt_return_lite mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl bad_page_fault - b ret_from_except + b interrupt_return /* We have a data breakpoint exception - handle it */ handle_dabr_fault: - bl save_nvgprs ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD @@ -3117,21 +3090,20 @@ handle_dabr_fault: /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. Don't use - * ret_from_except_lite here. + * interrupt_return_lite here. */ - b ret_from_except + b interrupt_return #ifdef CONFIG_PPC_BOOK3S_64 /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ -13: bl save_nvgprs - mr r5,r3 +13: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl low_hash_fault - b ret_from_except + b interrupt_return #endif /* @@ -3141,11 +3113,10 @@ handle_dabr_fault: * were soft-disabled. We want to invoke the exception handler for * the access, or panic if there isn't a handler. */ -77: bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD +77: addi r3,r1,STACK_FRAME_OVERHEAD li r5,SIGSEGV bl bad_page_fault - b ret_from_except + b interrupt_return /* * When doorbell is triggered from system reset wakeup, the message is diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2e5dca87b936..a25ed47087ee 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -110,6 +110,8 @@ static inline notrace int decrementer_check_overflow(void) return now >= *next_tb; } +#ifdef CONFIG_PPC_BOOK3E + /* This is called whenever we are re-enabling interrupts * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if * there's an EE, DEC or DBELL to generate. @@ -169,41 +171,16 @@ notrace unsigned int __check_irq_replay(void) } } - /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. - */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); - } - - /* - * Check if an hypervisor Maintenance interrupt happened. - * This is a higher priority interrupt than the others, so - * replay it first. - */ - if (happened & PACA_IRQ_HMI) { - local_paca->irq_happened &= ~PACA_IRQ_HMI; - return 0xe60; - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; } - if (happened & PACA_IRQ_PMI) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; - return 0xf00; - } - if (happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; return 0x500; } -#ifdef CONFIG_PPC_BOOK3E /* * Check if an EPR external interrupt happened this bit is typically * set if we need to handle another "edge" interrupt from within the @@ -218,20 +195,15 @@ notrace unsigned int __check_irq_replay(void) local_paca->irq_happened &= ~PACA_IRQ_DBELL; return 0x280; } -#else - if (happened & PACA_IRQ_DBELL) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - return 0xa00; - } -#endif /* CONFIG_PPC_BOOK3E */ /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); return 0; } +#endif /* CONFIG_PPC_BOOK3E */ -static void replay_soft_interrupts(void) +void replay_soft_interrupts(void) { /* * We use local_paca rather than get_paca() to avoid all diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 009833f928bf..9c21288f8645 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -236,23 +236,9 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); - -static int restore_fp(struct task_struct *tsk) -{ - if (tsk->thread.load_fp) { - load_fp_state(¤t->thread.fp_state); - current->thread.load_fp++; - return 1; - } - return 0; -} -#else -static int restore_fp(struct task_struct *tsk) { return 0; } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC -#define loadvec(thr) ((thr).load_vec) - static void __giveup_altivec(struct task_struct *tsk) { unsigned long msr; @@ -318,21 +304,6 @@ void flush_altivec_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_altivec_to_thread); - -static int restore_altivec(struct task_struct *tsk) -{ - if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { - load_vr_state(&tsk->thread.vr_state); - tsk->thread.used_vr = 1; - tsk->thread.load_vec++; - - return 1; - } - return 0; -} -#else -#define loadvec(thr) 0 -static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -400,18 +371,6 @@ void flush_vsx_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_vsx_to_thread); - -static int restore_vsx(struct task_struct *tsk) -{ - if (cpu_has_feature(CPU_FTR_VSX)) { - tsk->thread.used_vsr = 1; - return 1; - } - - return 0; -} -#else -static inline int restore_vsx(struct task_struct *tsk) { return 0; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -511,6 +470,53 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_FPU +static int restore_fp(struct task_struct *tsk) +{ + if (tsk->thread.load_fp) { + load_fp_state(¤t->thread.fp_state); + current->thread.load_fp++; + return 1; + } + return 0; +} +#else +static int restore_fp(struct task_struct *tsk) { return 0; } +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +#define loadvec(thr) ((thr).load_vec) +static int restore_altivec(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { + load_vr_state(&tsk->thread.vr_state); + tsk->thread.used_vr = 1; + tsk->thread.load_vec++; + + return 1; + } + return 0; +} +#else +#define loadvec(thr) 0 +static inline int restore_altivec(struct task_struct *tsk) { return 0; } +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static int restore_vsx(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_VSX)) { + tsk->thread.used_vsr = 1; + return 1; + } + + return 0; +} +#else +static inline int restore_vsx(struct task_struct *tsk) { return 0; } +#endif /* CONFIG_VSX */ + /* * The exception exit path calls restore_math() with interrupts hard disabled * but the soft irq state not "reconciled". ftrace code that calls @@ -551,6 +557,7 @@ void notrace restore_math(struct pt_regs *regs) regs->msr = msr; } +#endif static void save_all(struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 75be20fdb270..a986effee1e0 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -24,7 +24,11 @@ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, unsigned long ti_flags; syscall_fn f; + if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(!FULL_REGS(regs)); + BUG_ON(regs->softe != IRQS_ENABLED); account_cpu_user_entry(); @@ -196,7 +200,7 @@ again: trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; local_irq_enable(); - /* Took an interrupt which may have more exit work to do. */ + /* Took an interrupt, may have more exit work to do. */ goto again; } local_paca->irq_happened = 0; @@ -212,3 +216,168 @@ again: return ret; } + +#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +{ +#ifdef CONFIG_PPC_BOOK3E + struct thread_struct *ts = ¤t->thread; +#endif + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long ti_flags; + unsigned long flags; + unsigned long ret = 0; + + if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(!FULL_REGS(regs)); + BUG_ON(regs->softe != IRQS_ENABLED); + + local_irq_save(flags); + +again: + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); /* returning to user: may enable */ + if (ti_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + if (ti_flags & _TIF_SIGPENDING) + ret |= _TIF_RESTOREALL; + do_notify_resume(regs, ti_flags); + } + local_irq_disable(); + ti_flags = READ_ONCE(*ti_flagsp); + } + + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely((ti_flags & _TIF_RESTORE_TM))) { + restore_tm_state(regs); + } else { + unsigned long mathflags = MSR_FP; + + if (cpu_has_feature(CPU_FTR_VSX)) + mathflags |= MSR_VEC | MSR_VSX; + else if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mathflags |= MSR_VEC; + + if ((regs->msr & mathflags) != mathflags) + restore_math(regs); + } + } + + trace_hardirqs_on(); + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending())) { + __hard_RI_enable(); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + local_irq_enable(); + local_irq_disable(); + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + +#ifdef CONFIG_PPC_BOOK3E + if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + mtspr(SPRN_DBCR0, ts->debug.dbcr0); + mtspr(SPRN_DBSR, -1); + } +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + kuap_check_amr(); + + account_cpu_user_exit(); + + return ret; +} + +void unrecoverable_exception(struct pt_regs *regs); +void preempt_schedule_irq(void); + +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +{ + unsigned long *ti_flagsp = ¤t_thread_info()->flags; + unsigned long flags; + unsigned long ret = 0; + + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) + unrecoverable_exception(regs); + BUG_ON(regs->msr & MSR_PR); + BUG_ON(!FULL_REGS(regs)); + + if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); + ret = 1; + } + + local_irq_save(flags); + + if (regs->softe == IRQS_ENABLED) { + /* Returning to a kernel context with local irqs enabled. */ + WARN_ON_ONCE(!(regs->msr & MSR_EE)); +again: + if (IS_ENABLED(CONFIG_PREEMPT)) { + /* Return to preemptible kernel context */ + if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (preempt_count() == 0) + preempt_schedule_irq(); + } + } + + trace_hardirqs_on(); + __hard_EE_RI_disable(); + if (unlikely(lazy_irq_pending())) { + __hard_RI_enable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* + * Can't local_irq_restore to replay if we were in + * interrupt context. Must replay directly. + */ + if (irqs_disabled_flags(flags)) { + replay_soft_interrupts(); + } else { + local_irq_restore(flags); + local_irq_save(flags); + } + /* Took an interrupt, may have more exit work to do. */ + goto again; + } + local_paca->irq_happened = 0; + irq_soft_mask_set(IRQS_ENABLED); + } else { + /* Returning to a kernel context with local irqs disabled. */ + __hard_EE_RI_disable(); + if (regs->msr & MSR_EE) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + } + + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + kuap_restore_amr(regs); + + return ret; +} +#endif diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 25c14a0981bf..d20c5e79e03c 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -134,7 +134,7 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) - b fast_exception_return + b fast_interrupt_return #endif /* CONFIG_VSX */ -- cgit v1.2.3-59-g8ed1b From 702f0980522239bc7fd1360b24f722a90b6b4418 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:38 +1000 Subject: powerpc/64s/exception: Remove lite interrupt return Regular interrupt return restores NVGPRS whereas lite returns do not. This is clumsy: most interrupts can return without restoring NVGPRS in most of the time, but there are special cases that require it (when registers have been modified by the kernel). So change interrupt return to not restore NVGPRS, and have interrupt handlers restore them explicitly in the cases that requires it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-30-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 6 ------ arch/powerpc/kernel/exceptions-64s.S | 24 ++++++++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5d782acb86d4..95905ed3d86e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -479,12 +479,6 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) .globl interrupt_return interrupt_return: _ASM_NOKPROBE_SYMBOL(interrupt_return) - REST_NVGPRS(r1) - - .balign IFETCH_ALIGN_BYTES - .globl interrupt_return_lite -interrupt_return_lite: -_ASM_NOKPROBE_SYMBOL(interrupt_return_lite) ld r4,_MSR(r1) andi. r0,r4,MSR_PR beq .Lkernel_interrupt_return diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 11244031adc5..18bbce143084 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1529,7 +1529,7 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return_lite + b interrupt_return GEN_KVM hardware_interrupt @@ -1557,6 +1557,7 @@ EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return GEN_KVM alignment @@ -1620,6 +1621,7 @@ EXC_COMMON_BEGIN(program_check_common) 3: addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return GEN_KVM program_check @@ -1716,7 +1718,7 @@ EXC_COMMON_BEGIN(decrementer_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt - b interrupt_return_lite + b interrupt_return GEN_KVM decrementer @@ -1807,7 +1809,7 @@ EXC_COMMON_BEGIN(doorbell_super_common) #else bl unknown_exception #endif - b interrupt_return_lite + b interrupt_return GEN_KVM doorbell_super @@ -2076,6 +2078,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist addi r3,r1,STACK_FRAME_OVERHEAD bl emulation_assist_interrupt + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ b interrupt_return GEN_KVM emulation_assist @@ -2194,7 +2197,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) #else bl unknown_exception #endif - b interrupt_return_lite + b interrupt_return GEN_KVM h_doorbell @@ -2224,7 +2227,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return_lite + b interrupt_return GEN_KVM h_virt_irq @@ -2271,7 +2274,7 @@ EXC_COMMON_BEGIN(performance_monitor_common) RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b interrupt_return_lite + b interrupt_return GEN_KVM performance_monitor @@ -2668,6 +2671,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC bl altivec_assist_exception + REST_NVGPRS(r1) /* instruction emulation may change GPRs */ #else bl unknown_exception #endif @@ -3057,7 +3061,7 @@ do_hash_page: cmpdi r3,0 /* see if __hash_page succeeded */ /* Success */ - beq interrupt_return_lite /* Return from exception on success */ + beq interrupt_return /* Return from exception on success */ /* Error */ blt- 13f @@ -3074,7 +3078,7 @@ handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 - beq+ interrupt_return_lite + beq+ interrupt_return mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) @@ -3089,9 +3093,9 @@ handle_dabr_fault: bl do_break /* * do_break() may have changed the NV GPRS while handling a breakpoint. - * If so, we need to restore them with their updated values. Don't use - * interrupt_return_lite here. + * If so, we need to restore them with their updated values. */ + REST_NVGPRS(r1) b interrupt_return -- cgit v1.2.3-59-g8ed1b From 5f0b6ac3905fc961f3b685a08eb4962ff071ea7d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Feb 2020 03:35:39 +1000 Subject: powerpc/64/syscall: Reconcile interrupts This reconciles interrupts in the system call case like all other interrupts. This allows system_call_common to be shared with the scv system call implementation in a subsequent patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200225173541.1549955-31-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 11 +++++++++++ arch/powerpc/kernel/syscall_64.c | 24 ++++++++++-------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 95905ed3d86e..63f0a4414618 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -119,6 +119,17 @@ END_BTB_FLUSH_SECTION ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ + /* + * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which + * would clobber syscall parameters. Also we always enter with IRQs + * enabled and nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + */ + li r11,IRQS_ALL_DISABLED + li r12,PACA_IRQ_HARD_DIS + stb r11,PACAIRQSOFTMASK(r13) + stb r12,PACAIRQHAPPENED(r13) + /* Calling convention has r9 = orig r0, r10 = regs */ mr r9,r0 bl system_call_exception diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index a986effee1e0..cf06eb443a80 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -17,13 +17,19 @@ typedef long (*syscall_fn)(long, long, long, long, long, long); -/* Has to run notrace because it is entered "unreconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) +/* Has to run notrace because it is entered not completely "reconciled" */ +notrace long system_call_exception(long r3, long r4, long r5, + long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs) { unsigned long ti_flags; syscall_fn f; + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + trace_hardirqs_off(); /* finish reconciling */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); @@ -44,16 +50,6 @@ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, kuap_check_amr(); - /* - * A syscall should always be called with interrupts enabled - * so we just unconditionally hard-enable here. When some kind - * of irq tracing is used, we additionally check that condition - * is correct - */ - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - WARN_ON(irq_soft_mask_return() != IRQS_ENABLED); - WARN_ON(local_paca->irq_happened); - } /* * This is not required for the syscall exit path, but makes the * stack frame look nicer. If this was initialised in the first stack @@ -62,7 +58,7 @@ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, */ regs->softe = IRQS_ENABLED; - __hard_irq_enable(); + local_irq_enable(); ti_flags = current_thread_info()->flags; if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { -- cgit v1.2.3-59-g8ed1b From da9a1c10e2c7311e923210b6ccd9fbd1ac9132df Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:37 +0000 Subject: powerpc: Move ptrace into a subdirectory. In order to allow splitting of ptrace depending on the different CONFIG_ options, create a subdirectory dedicated to ptrace and move ptrace.c and ptrace32.c into it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9ebcbe37834e9d447dd97f4381084795a673260c.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/Makefile | 8 +- arch/powerpc/kernel/ptrace.c | 3468 --------------------------------- arch/powerpc/kernel/ptrace/Makefile | 9 + arch/powerpc/kernel/ptrace/ptrace.c | 3468 +++++++++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace32.c | 318 +++ arch/powerpc/kernel/ptrace32.c | 318 --- 6 files changed, 3798 insertions(+), 3791 deletions(-) delete mode 100644 arch/powerpc/kernel/ptrace.c create mode 100644 arch/powerpc/kernel/ptrace/Makefile create mode 100644 arch/powerpc/kernel/ptrace/ptrace.c create mode 100644 arch/powerpc/kernel/ptrace/ptrace32.c delete mode 100644 arch/powerpc/kernel/ptrace32.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 5700231a8988..570660efbb3d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -41,15 +39,15 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif -obj-y := cputable.o ptrace.o syscalls.o \ +obj-y := cputable.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o -obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ - signal_64.o ptrace32.o \ +obj-y += ptrace/ +obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \ paca.o nvram_64.o firmware.o note.o \ syscall_64.o obj-$(CONFIG_VDSO32) += vdso32/ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c deleted file mode 100644 index 25c0424e8868..000000000000 --- a/arch/powerpc/kernel/ptrace.c +++ /dev/null @@ -1,3468 +0,0 @@ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Derived from "arch/m68k/kernel/ptrace.c" - * Copyright (C) 1994 by Hamish Macdonald - * Taken from linux/kernel/ptrace.c and modified for M680x0. - * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds - * - * Modified by Cort Dougan (cort@hq.fsmlabs.com) - * and Paul Mackerras (paulus@samba.org). - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -/* - * The parameter save area on the stack is used to store arguments being passed - * to callee function and is located at fixed offset from stack pointer. - */ -#ifdef CONFIG_PPC32 -#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ -#else /* CONFIG_PPC32 */ -#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ -#endif - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define STR(s) #s /* convert to string */ -#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} -#define GPR_OFFSET_NAME(num) \ - {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ - {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -#define TVSO(f) (offsetof(struct thread_vr_state, f)) -#define TFSO(f) (offsetof(struct thread_fp_state, f)) -#define TSO(f) (offsetof(struct thread_struct, f)) - -static const struct pt_regs_offset regoffset_table[] = { - GPR_OFFSET_NAME(0), - GPR_OFFSET_NAME(1), - GPR_OFFSET_NAME(2), - GPR_OFFSET_NAME(3), - GPR_OFFSET_NAME(4), - GPR_OFFSET_NAME(5), - GPR_OFFSET_NAME(6), - GPR_OFFSET_NAME(7), - GPR_OFFSET_NAME(8), - GPR_OFFSET_NAME(9), - GPR_OFFSET_NAME(10), - GPR_OFFSET_NAME(11), - GPR_OFFSET_NAME(12), - GPR_OFFSET_NAME(13), - GPR_OFFSET_NAME(14), - GPR_OFFSET_NAME(15), - GPR_OFFSET_NAME(16), - GPR_OFFSET_NAME(17), - GPR_OFFSET_NAME(18), - GPR_OFFSET_NAME(19), - GPR_OFFSET_NAME(20), - GPR_OFFSET_NAME(21), - GPR_OFFSET_NAME(22), - GPR_OFFSET_NAME(23), - GPR_OFFSET_NAME(24), - GPR_OFFSET_NAME(25), - GPR_OFFSET_NAME(26), - GPR_OFFSET_NAME(27), - GPR_OFFSET_NAME(28), - GPR_OFFSET_NAME(29), - GPR_OFFSET_NAME(30), - GPR_OFFSET_NAME(31), - REG_OFFSET_NAME(nip), - REG_OFFSET_NAME(msr), - REG_OFFSET_NAME(ctr), - REG_OFFSET_NAME(link), - REG_OFFSET_NAME(xer), - REG_OFFSET_NAME(ccr), -#ifdef CONFIG_PPC64 - REG_OFFSET_NAME(softe), -#else - REG_OFFSET_NAME(mq), -#endif - REG_OFFSET_NAME(trap), - REG_OFFSET_NAME(dar), - REG_OFFSET_NAME(dsisr), - REG_OFFSET_END, -}; - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static void flush_tmregs_to_thread(struct task_struct *tsk) -{ - /* - * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). - * - * A reclaim flushes ALL the state or if not in TM save TM SPRs - * in the appropriate thread structures from live. - */ - - if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) - return; - - if (MSR_TM_SUSPENDED(mfmsr())) { - tm_reclaim_current(TM_CAUSE_SIGNAL); - } else { - tm_enable(); - tm_save_sprs(&(tsk->thread)); - } -} -#else -static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } -#endif - -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -/** - * regs_query_register_name() - query register name from its offset - * @offset: the offset of a register in struct pt_regs. - * - * regs_query_register_name() returns the name of a register from its - * offset in struct pt_regs. If the @offset is invalid, this returns NULL; - */ -const char *regs_query_register_name(unsigned int offset) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (roff->offset == offset) - return roff->name; - return NULL; -} - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Set of msr bits that gdb can change on behalf of a process. - */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -#define MSR_DEBUGCHANGE 0 -#else -#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) -#endif - -/* - * Max register writeable via put_reg - */ -#ifdef CONFIG_PPC32 -#define PT_MAX_PUT_REG PT_MQ -#else -#define PT_MAX_PUT_REG PT_CCR -#endif - -static unsigned long get_user_msr(struct task_struct *task) -{ - return task->thread.regs->msr | task->thread.fpexc_mode; -} - -static int set_user_msr(struct task_struct *task, unsigned long msr) -{ - task->thread.regs->msr &= ~MSR_DEBUGCHANGE; - task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; - return 0; -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static unsigned long get_user_ckpt_msr(struct task_struct *task) -{ - return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; -} - -static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) -{ - task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; - task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; - return 0; -} - -static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) -{ - task->thread.ckpt_regs.trap = trap & 0xfff0; - return 0; -} -#endif - -#ifdef CONFIG_PPC64 -static int get_user_dscr(struct task_struct *task, unsigned long *data) -{ - *data = task->thread.dscr; - return 0; -} - -static int set_user_dscr(struct task_struct *task, unsigned long dscr) -{ - task->thread.dscr = dscr; - task->thread.dscr_inherit = 1; - return 0; -} -#else -static int get_user_dscr(struct task_struct *task, unsigned long *data) -{ - return -EIO; -} - -static int set_user_dscr(struct task_struct *task, unsigned long dscr) -{ - return -EIO; -} -#endif - -/* - * We prevent mucking around with the reserved area of trap - * which are used internally by the kernel. - */ -static int set_user_trap(struct task_struct *task, unsigned long trap) -{ - task->thread.regs->trap = trap & 0xfff0; - return 0; -} - -/* - * Get contents of register REGNO in task TASK. - */ -int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) -{ - unsigned int regs_max; - - if ((task->thread.regs == NULL) || !data) - return -EIO; - - if (regno == PT_MSR) { - *data = get_user_msr(task); - return 0; - } - - if (regno == PT_DSCR) - return get_user_dscr(task, data); - -#ifdef CONFIG_PPC64 - /* - * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is - * no more used as a flag, lets force usr to alway see the softe value as 1 - * which means interrupts are not soft disabled. - */ - if (regno == PT_SOFTE) { - *data = 1; - return 0; - } -#endif - - regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); - if (regno < regs_max) { - regno = array_index_nospec(regno, regs_max); - *data = ((unsigned long *)task->thread.regs)[regno]; - return 0; - } - - return -EIO; -} - -/* - * Write contents of register REGNO in task TASK. - */ -int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) -{ - if (task->thread.regs == NULL) - return -EIO; - - if (regno == PT_MSR) - return set_user_msr(task, data); - if (regno == PT_TRAP) - return set_user_trap(task, data); - if (regno == PT_DSCR) - return set_user_dscr(task, data); - - if (regno <= PT_MAX_PUT_REG) { - regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1); - ((unsigned long *)task->thread.regs)[regno] = data; - return 0; - } - return -EIO; -} - -static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int i, ret; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_msr(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; -} - -static int gpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - unsigned long reg; - int ret; - - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, PT_MSR * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_MSR * sizeof(reg), - (PT_MSR + 1) * sizeof(reg)); - if (!ret) - ret = set_user_msr(target, reg); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - PT_ORIG_R3 * sizeof(reg), - (PT_MAX_PUT_REG + 1) * sizeof(reg)); - - if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_TRAP * sizeof(reg), - (PT_TRAP + 1) * sizeof(reg)); - if (!ret) - ret = set_user_trap(target, reg); - } - - if (!ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); - - return ret; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - * }; - */ -static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ -#ifdef CONFIG_VSX - u64 buf[33]; - int i; - - flush_fp_to_thread(target); - - /* copy to local buffer then write that out */ - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -#else - BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32])); - - flush_fp_to_thread(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); -#endif -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - * }; - * - */ -static int fpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ -#ifdef CONFIG_VSX - u64 buf[33]; - int i; - - flush_fp_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; - - for (i = 0; i < 32 ; i++) - target->thread.TS_FPR(i) = buf[i]; - target->thread.fp_state.fpscr = buf[32]; - return 0; -#else - BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32])); - - flush_fp_to_thread(target); - - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); -#endif -} - -#ifdef CONFIG_ALTIVEC -/* - * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. - * The transfer totals 34 quadword. Quadwords 0-31 contain the - * corresponding vector registers. Quadword 32 contains the vscr as the - * last word (offset 12) within that quadword. Quadword 33 contains the - * vrsave as the first word (offset 0) within the quadword. - * - * This definition of the VMX state is compatible with the current PPC32 - * ptrace interface. This allows signal handling and ptrace to use the - * same structures. This also simplifies the implementation of a bi-arch - * (combined (32- and 64-bit) gdb. - */ - -static int vr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_altivec_to_thread(target); - return target->thread.used_vr ? regset->n : 0; -} - -/* - * Regardless of transactions, 'vr_state' holds the current running - * value of all the VMX registers and 'ckvr_state' holds the last - * checkpointed value of all the VMX registers for the current - * transaction to fall back on in case it aborts. - * - * Userspace interface buffer layout: - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - * }; - */ -static int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - flush_altivec_to_thread(target); - - BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != - offsetof(struct thread_vr_state, vr[32])); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - } - - return ret; -} - -/* - * Regardless of transactions, 'vr_state' holds the current running - * value of all the VMX registers and 'ckvr_state' holds the last - * checkpointed value of all the VMX registers for the current - * transaction to fall back on in case it aborts. - * - * Userspace interface buffer layout: - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - * }; - */ -static int vr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - flush_altivec_to_thread(target); - - BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != - offsetof(struct thread_vr_state, vr[32])); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret && count > 0) { - /* - * We use only the first word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - if (!ret) - target->thread.vrsave = vrsave.word; - } - - return ret; -} -#endif /* CONFIG_ALTIVEC */ - -#ifdef CONFIG_VSX -/* - * Currently to set and and get all the vsx state, you need to call - * the fp and VMX calls as well. This only get/sets the lower 32 - * 128bit VSX registers. - */ - -static int vsr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_vsx_to_thread(target); - return target->thread.used_vsr ? regset->n : 0; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last - * checkpointed value of all FPR registers for the current - * transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 vsx[32]; - * }; - */ -static int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last - * checkpointed value of all FPR registers for the current - * transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 vsx[32]; - * }; - */ -static int vsr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[32]; - int ret,i; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - if (!ret) - for (i = 0; i < 32 ; i++) - target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - - return ret; -} -#endif /* CONFIG_VSX */ - -#ifdef CONFIG_SPE - -/* - * For get_evrregs/set_evrregs functions 'data' has the following layout: - * - * struct { - * u32 evr[32]; - * u64 acc; - * u32 spefscr; - * } - */ - -static int evr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_spe_to_thread(target); - return target->thread.used_spe ? regset->n : 0; -} - -static int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - flush_spe_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); - - BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != - offsetof(struct thread_struct, spefscr)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; -} - -static int evr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - flush_spe_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); - - BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != - offsetof(struct thread_struct, spefscr)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; -} -#endif /* CONFIG_SPE */ - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/** - * tm_cgpr_active - get active number of registers in CGPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed GPR category. - */ -static int tm_cgpr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cgpr_get - get CGPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets transaction checkpointed GPR registers. - * - * When the transaction is active, 'ckpt_regs' holds all the checkpointed - * GPR register values for the current transaction to fall back on if it - * aborts in between. This function gets those checkpointed GPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * struct pt_regs ckpt_regs; - * }; - */ -static int tm_cgpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_ckpt_msr(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; -} - -/* - * tm_cgpr_set - set the CGPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed GPR registers. - * - * When the transaction is active, 'ckpt_regs' holds the checkpointed - * GPR register values for the current transaction to fall back on if it - * aborts in between. This function sets those checkpointed GPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * struct pt_regs ckpt_regs; - * }; - */ -static int tm_cgpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - unsigned long reg; - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, PT_MSR * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_MSR * sizeof(reg), - (PT_MSR + 1) * sizeof(reg)); - if (!ret) - ret = set_user_ckpt_msr(target, reg); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - PT_ORIG_R3 * sizeof(reg), - (PT_MAX_PUT_REG + 1) * sizeof(reg)); - - if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_TRAP * sizeof(reg), - (PT_TRAP + 1) * sizeof(reg)); - if (!ret) - ret = set_user_ckpt_trap(target, reg); - } - - if (!ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); - - return ret; -} - -/** - * tm_cfpr_active - get active number of registers in CFPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed FPR category. - */ -static int tm_cfpr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cfpr_get - get CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed FPR registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * values for the current transaction to fall back on if it aborts - * in between. This function gets those checkpointed FPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - *}; - */ -static int tm_cfpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[33]; - int i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* copy to local buffer then write that out */ - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_CKFPR(i); - buf[32] = target->thread.ckfp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -} - -/** - * tm_cfpr_set - set CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed FPR registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * FPR register values for the current transaction to fall back on - * if it aborts in between. This function sets these checkpointed - * FPR registers. The userspace interface buffer layout is as follows. - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - *}; - */ -static int tm_cfpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[33]; - int i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - for (i = 0; i < 32; i++) - buf[i] = target->thread.TS_CKFPR(i); - buf[32] = target->thread.ckfp_state.fpscr; - - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; - for (i = 0; i < 32 ; i++) - target->thread.TS_CKFPR(i) = buf[i]; - target->thread.ckfp_state.fpscr = buf[32]; - return 0; -} - -/** - * tm_cvmx_active - get active number of registers in CVMX - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in checkpointed VMX category. - */ -static int tm_cvmx_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cvmx_get - get CMVX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed VMX registers. - * - * When the transaction is active 'ckvr_state' and 'ckvrsave' hold - * the checkpointed values for the current transaction to fall - * back on if it aborts in between. The userspace interface buffer - * layout is as follows. - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - *}; - */ -static int tm_cvmx_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckvr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - } - - return ret; -} - -/** - * tm_cvmx_set - set CMVX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed VMX registers. - * - * When the transaction is active 'ckvr_state' and 'ckvrsave' hold - * the checkpointed values for the current transaction to fall - * back on if it aborts in between. The userspace interface buffer - * layout is as follows. - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - *}; - */ -static int tm_cvmx_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckvr_state, 0, - 33 * sizeof(vector128)); - if (!ret && count > 0) { - /* - * We use only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - if (!ret) - target->thread.ckvrsave = vrsave.word; - } - - return ret; -} - -/** - * tm_cvsx_active - get active number of registers in CVSX - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed VSX category. - */ -static int tm_cvsx_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - flush_vsx_to_thread(target); - return target->thread.used_vsr ? regset->n : 0; -} - -/** - * tm_cvsx_get - get CVSX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed VSX registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * values for the current transaction to fall back on if it aborts - * in between. This function gets those checkpointed VSX registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * u64 vsx[32]; - *}; - */ -static int tm_cvsx_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; -} - -/** - * tm_cvsx_set - set CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed VSX registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * VSX register values for the current transaction to fall back on - * if it aborts in between. This function sets these checkpointed - * FPR registers. The userspace interface buffer layout is as follows. - * - * struct data { - * u64 vsx[32]; - *}; - */ -static int tm_cvsx_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - if (!ret) - for (i = 0; i < 32 ; i++) - target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - - return ret; -} - -/** - * tm_spr_active - get active number of registers in TM SPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks the active number of available - * regisers in the transactional memory SPR category. - */ -static int tm_spr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - return regset->n; -} - -/** - * tm_spr_get - get the TM related SPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets transactional memory related SPR registers. - * The userspace interface buffer layout is as follows. - * - * struct { - * u64 tm_tfhar; - * u64 tm_texasr; - * u64 tm_tfiar; - * }; - */ -static int tm_spr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - /* Build tests */ - BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); - BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); - BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - /* Flush the states */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* TFHAR register */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - - /* TEXASR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - - /* TFIAR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; -} - -/** - * tm_spr_set - set the TM related SPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets transactional memory related SPR registers. - * The userspace interface buffer layout is as follows. - * - * struct { - * u64 tm_tfhar; - * u64 tm_texasr; - * u64 tm_tfiar; - * }; - */ -static int tm_spr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - /* Build tests */ - BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); - BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); - BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - /* Flush the states */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* TFHAR register */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - - /* TEXASR register */ - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - - /* TFIAR register */ - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; -} - -static int tm_tar_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - -static int tm_tar_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; -} - -static int tm_tar_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; -} - -static int tm_ppr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - - -static int tm_ppr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; -} - -static int tm_ppr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; -} - -static int tm_dscr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - -static int tm_dscr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; -} - -static int tm_dscr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - -#ifdef CONFIG_PPC64 -static int ppr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); -} - -static int ppr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); -} - -static int dscr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); -} -static int dscr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); -} -#endif -#ifdef CONFIG_PPC_BOOK3S_64 -static int tar_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); -} -static int tar_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); -} - -static int ebb_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (target->thread.used_ebb) - return regset->n; - - return 0; -} - -static int ebb_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - /* Build tests */ - BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); - BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (!target->thread.used_ebb) - return -ENODATA; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbrr, 0, 3 * sizeof(unsigned long)); -} - -static int ebb_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - - /* Build tests */ - BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); - BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (target->thread.used_ebb) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbrr, 0, sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbhr, sizeof(unsigned long), - 2 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.bescr, - 2 * sizeof(unsigned long), 3 * sizeof(unsigned long)); - - return ret; -} -static int pmu_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return regset->n; -} - -static int pmu_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - /* Build tests */ - BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); - BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); - BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); - BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.siar, 0, - 5 * sizeof(unsigned long)); -} - -static int pmu_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - - /* Build tests */ - BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); - BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); - BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); - BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.siar, 0, - sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.sdar, sizeof(unsigned long), - 2 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.sier, 2 * sizeof(unsigned long), - 3 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.mmcr2, 3 * sizeof(unsigned long), - 4 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.mmcr0, 4 * sizeof(unsigned long), - 5 * sizeof(unsigned long)); - return ret; -} -#endif - -#ifdef CONFIG_PPC_MEM_KEYS -static int pkey_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!arch_pkeys_enabled()) - return -ENODEV; - - return regset->n; -} - -static int pkey_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); - - if (!arch_pkeys_enabled()) - return -ENODEV; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.amr, 0, - ELF_NPKEY * sizeof(unsigned long)); -} - -static int pkey_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 new_amr; - int ret; - - if (!arch_pkeys_enabled()) - return -ENODEV; - - /* Only the AMR can be set from userspace */ - if (pos != 0 || count != sizeof(new_amr)) - return -EINVAL; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &new_amr, 0, sizeof(new_amr)); - if (ret) - return ret; - - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); - - return 0; -} -#endif /* CONFIG_PPC_MEM_KEYS */ - -/* - * These are our native regset flavors. - */ -enum powerpc_regset { - REGSET_GPR, - REGSET_FPR, -#ifdef CONFIG_ALTIVEC - REGSET_VMX, -#endif -#ifdef CONFIG_VSX - REGSET_VSX, -#endif -#ifdef CONFIG_SPE - REGSET_SPE, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - REGSET_TM_CGPR, /* TM checkpointed GPR registers */ - REGSET_TM_CFPR, /* TM checkpointed FPR registers */ - REGSET_TM_CVMX, /* TM checkpointed VMX registers */ - REGSET_TM_CVSX, /* TM checkpointed VSX registers */ - REGSET_TM_SPR, /* TM specific SPR registers */ - REGSET_TM_CTAR, /* TM checkpointed TAR register */ - REGSET_TM_CPPR, /* TM checkpointed PPR register */ - REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ -#endif -#ifdef CONFIG_PPC64 - REGSET_PPR, /* PPR register */ - REGSET_DSCR, /* DSCR register */ -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - REGSET_TAR, /* TAR register */ - REGSET_EBB, /* EBB registers */ - REGSET_PMR, /* Performance Monitor Registers */ -#endif -#ifdef CONFIG_PPC_MEM_KEYS - REGSET_PKEY, /* AMR register */ -#endif -}; - -static const struct user_regset native_regsets[] = { - [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .get = gpr_get, .set = gpr_set - }, - [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set - }, -#ifdef CONFIG_ALTIVEC - [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set - }, -#endif -#ifdef CONFIG_VSX - [REGSET_VSX] = { - .core_note_type = NT_PPC_VSX, .n = 32, - .size = sizeof(double), .align = sizeof(double), - .active = vsr_active, .get = vsr_get, .set = vsr_set - }, -#endif -#ifdef CONFIG_SPE - [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, - .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set - }, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set - }, - [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set - }, - [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set - }, - [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set - }, - [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set - }, - [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set - }, - [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set - }, - [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set - }, -#endif -#ifdef CONFIG_PPC64 - [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set - }, - [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set - }, -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set - }, - [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, - .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set - }, - [REGSET_PMR] = { - .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, - .size = sizeof(u64), .align = sizeof(u64), - .active = pmu_active, .get = pmu_get, .set = pmu_set - }, -#endif -#ifdef CONFIG_PPC_MEM_KEYS - [REGSET_PKEY] = { - .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, - .size = sizeof(u64), .align = sizeof(u64), - .active = pkey_active, .get = pkey_get, .set = pkey_set - }, -#endif -}; - -static const struct user_regset_view user_ppc_native_view = { - .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, - .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) -}; - -#ifdef CONFIG_PPC64 -#include - -static int gpr32_get_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs) -{ - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_MSR; --count) - if (__put_user((compat_ulong_t) regs[pos++], u++)) - return -EFAULT; - - if (count > 0 && pos == PT_MSR) { - reg = get_user_msr(target); - if (kbuf) - *k++ = reg; - else if (__put_user(reg, u++)) - return -EFAULT; - ++pos; - --count; - } - - if (kbuf) - for (; count > 0 && pos < PT_REGS_COUNT; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_REGS_COUNT; --count) - if (__put_user((compat_ulong_t) regs[pos++], u++)) - return -EFAULT; - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - PT_REGS_COUNT * sizeof(reg), -1); -} - -static int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) -{ - const compat_ulong_t *k = kbuf; - const compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - if (__get_user(reg, u++)) - return -EFAULT; - regs[pos++] = reg; - } - - - if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; - set_user_msr(target, reg); - ++pos; - --count; - } - - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - if (__get_user(reg, u++)) - return -EFAULT; - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - if (__get_user(reg, u++)) - return -EFAULT; - } - - if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; - set_user_trap(target, reg); - ++pos; - --count; - } - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static int tm_cgpr32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.ckpt_regs.gpr[0]); -} - -static int tm_cgpr32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.ckpt_regs.gpr[0]); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - -static int gpr32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int i; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.regs->gpr[0]); -} - -static int gpr32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.regs->gpr[0]); -} - -/* - * These are the regset flavors matching the CONFIG_PPC32 native set. - */ -static const struct user_regset compat_regsets[] = { - [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, - .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = gpr32_get, .set = gpr32_set - }, - [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set - }, -#ifdef CONFIG_ALTIVEC - [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set - }, -#endif -#ifdef CONFIG_SPE - [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, - .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set - }, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, - .get = tm_cgpr32_get, .set = tm_cgpr32_set - }, - [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set - }, - [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set - }, - [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set - }, - [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set - }, - [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set - }, - [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set - }, - [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set - }, -#endif -#ifdef CONFIG_PPC64 - [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set - }, - [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set - }, -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set - }, - [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, - .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set - }, -#endif -}; - -static const struct user_regset_view user_ppc_compat_view = { - .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, - .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) -}; -#endif /* CONFIG_PPC64 */ - -const struct user_regset_view *task_user_regset_view(struct task_struct *task) -{ -#ifdef CONFIG_PPC64 - if (test_tsk_thread_flag(task, TIF_32BIT)) - return &user_ppc_compat_view; -#endif - return &user_ppc_native_view; -} - - -void user_enable_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.debug.dbcr0 &= ~DBCR0_BT; - task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; -#else - regs->msr &= ~MSR_BE; - regs->msr |= MSR_SE; -#endif - } - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -void user_enable_block_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.debug.dbcr0 &= ~DBCR0_IC; - task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; - regs->msr |= MSR_DE; -#else - regs->msr &= ~MSR_SE; - regs->msr |= MSR_BE; -#endif - } - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -void user_disable_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * The logic to disable single stepping should be as - * simple as turning off the Instruction Complete flag. - * And, after doing so, if all debug flags are off, turn - * off DBCR0(IDM) and MSR(DE) .... Torez - */ - task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT); - /* - * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. - */ - if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, - task->thread.debug.dbcr1)) { - /* - * All debug events were off..... - */ - task->thread.debug.dbcr0 &= ~DBCR0_IDM; - regs->msr &= ~MSR_DE; - } -#else - regs->msr &= ~(MSR_SE | MSR_BE); -#endif - } - clear_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -#ifdef CONFIG_HAVE_HW_BREAKPOINT -void ptrace_triggered(struct perf_event *bp, - struct perf_sample_data *data, struct pt_regs *regs) -{ - struct perf_event_attr attr; - - /* - * Disable the breakpoint request here since ptrace has defined a - * one-shot behaviour for breakpoint exceptions in PPC64. - * The SIGTRAP signal is generated automatically for us in do_dabr(). - * We don't have to do anything about that here - */ - attr = bp->attr; - attr.disabled = true; - modify_user_hw_breakpoint(bp, &attr); -} -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - -static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, - unsigned long data) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int ret; - struct thread_struct *thread = &(task->thread); - struct perf_event *bp; - struct perf_event_attr attr; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - bool set_bp = true; - struct arch_hw_breakpoint hw_brk; -#endif - - /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). - * For embedded processors we support one DAC and no IAC's at the - * moment. - */ - if (addr > 0) - return -EINVAL; - - /* The bottom 3 bits in dabr are flags */ - if ((data & ~0x7UL) >= TASK_SIZE) - return -EIO; - -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. - * It was assumed, on previous implementations, that 3 bits were - * passed together with the data address, fitting the design of the - * DABR register, as follows: - * - * bit 0: Read flag - * bit 1: Write flag - * bit 2: Breakpoint translation - * - * Thus, we use them here as so. - */ - - /* Ensure breakpoint translation bit is set */ - if (data && !(data & HW_BRK_TYPE_TRANSLATE)) - return -EIO; - hw_brk.address = data & (~HW_BRK_TYPE_DABR); - hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - hw_brk.len = DABR_MAX_LEN; - hw_brk.hw_len = DABR_MAX_LEN; - set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); -#ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; - if (!set_bp) { - if (bp) { - unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; - } - return 0; - } - if (bp) { - attr = bp->attr; - attr.bp_addr = hw_brk.address; - attr.bp_len = DABR_MAX_LEN; - arch_bp_generic_fields(hw_brk.type, &attr.bp_type); - - /* Enable breakpoint */ - attr.disabled = false; - - ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) { - return ret; - } - thread->ptrace_bps[0] = bp; - thread->hw_brk = hw_brk; - return 0; - } - - /* Create a new breakpoint request if one doesn't exist already */ - hw_breakpoint_init(&attr); - attr.bp_addr = hw_brk.address; - attr.bp_len = DABR_MAX_LEN; - arch_bp_generic_fields(hw_brk.type, - &attr.bp_type); - - thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, - ptrace_triggered, NULL, task); - if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; - return PTR_ERR(bp); - } - -#else /* !CONFIG_HAVE_HW_BREAKPOINT */ - if (set_bp && (!ppc_breakpoint_available())) - return -ENODEV; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - task->thread.hw_brk = hw_brk; -#else /* CONFIG_PPC_ADV_DEBUG_REGS */ - /* As described above, it was assumed 3 bits were passed with the data - * address, but we will assume only the mode bits will be passed - * as to not cause alignment restrictions for DAC-based processors. - */ - - /* DAC's hold the whole address without any mode flags */ - task->thread.debug.dac1 = data & ~0x3UL; - - if (task->thread.debug.dac1 == 0) { - dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); - if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, - task->thread.debug.dbcr1)) { - task->thread.regs->msr &= ~MSR_DE; - task->thread.debug.dbcr0 &= ~DBCR0_IDM; - } - return 0; - } - - /* Read or Write bits must be set */ - - if (!(data & 0x3UL)) - return -EINVAL; - - /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 - register */ - task->thread.debug.dbcr0 |= DBCR0_IDM; - - /* Check for write and read flags and set DBCR0 - accordingly */ - dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); - if (data & 0x1UL) - dbcr_dac(task) |= DBCR_DAC1R; - if (data & 0x2UL) - dbcr_dac(task) |= DBCR_DAC1W; - task->thread.regs->msr |= MSR_DE; -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - return 0; -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure single step bits etc are not set. - */ -void ptrace_disable(struct task_struct *child) -{ - /* make sure the single step bit is not set. */ - user_disable_single_step(child); -} - -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static long set_instruction_bp(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ - int slot; - int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); - int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); - int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); - int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) - slot2_in_use = 1; - if (dbcr_iac_range(child) & DBCR_IAC34MODE) - slot4_in_use = 1; - - if (bp_info->addr >= TASK_SIZE) - return -EIO; - - if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { - - /* Make sure range is valid. */ - if (bp_info->addr2 >= TASK_SIZE) - return -EIO; - - /* We need a pair of IAC regsisters */ - if ((!slot1_in_use) && (!slot2_in_use)) { - slot = 1; - child->thread.debug.iac1 = bp_info->addr; - child->thread.debug.iac2 = bp_info->addr2; - child->thread.debug.dbcr0 |= DBCR0_IAC1; - if (bp_info->addr_mode == - PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - dbcr_iac_range(child) |= DBCR_IAC12X; - else - dbcr_iac_range(child) |= DBCR_IAC12I; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - } else if ((!slot3_in_use) && (!slot4_in_use)) { - slot = 3; - child->thread.debug.iac3 = bp_info->addr; - child->thread.debug.iac4 = bp_info->addr2; - child->thread.debug.dbcr0 |= DBCR0_IAC3; - if (bp_info->addr_mode == - PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - dbcr_iac_range(child) |= DBCR_IAC34X; - else - dbcr_iac_range(child) |= DBCR_IAC34I; -#endif - } else - return -ENOSPC; - } else { - /* We only need one. If possible leave a pair free in - * case a range is needed later - */ - if (!slot1_in_use) { - /* - * Don't use iac1 if iac1-iac2 are free and either - * iac3 or iac4 (but not both) are free - */ - if (slot2_in_use || (slot3_in_use == slot4_in_use)) { - slot = 1; - child->thread.debug.iac1 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC1; - goto out; - } - } - if (!slot2_in_use) { - slot = 2; - child->thread.debug.iac2 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC2; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - } else if (!slot3_in_use) { - slot = 3; - child->thread.debug.iac3 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC3; - } else if (!slot4_in_use) { - slot = 4; - child->thread.debug.iac4 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC4; -#endif - } else - return -ENOSPC; - } -out: - child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; - - return slot; -} - -static int del_instruction_bp(struct task_struct *child, int slot) -{ - switch (slot) { - case 1: - if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) { - /* address range - clear slots 1 & 2 */ - child->thread.debug.iac2 = 0; - dbcr_iac_range(child) &= ~DBCR_IAC12MODE; - } - child->thread.debug.iac1 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC1; - break; - case 2: - if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) - /* used in a range */ - return -EINVAL; - child->thread.debug.iac2 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC2; - break; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - case 3: - if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC34MODE) { - /* address range - clear slots 3 & 4 */ - child->thread.debug.iac4 = 0; - dbcr_iac_range(child) &= ~DBCR_IAC34MODE; - } - child->thread.debug.iac3 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC3; - break; - case 4: - if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC34MODE) - /* Used in a range */ - return -EINVAL; - child->thread.debug.iac4 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC4; - break; -#endif - default: - return -EINVAL; - } - return 0; -} - -static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) -{ - int byte_enable = - (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) - & 0xf; - int condition_mode = - bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; - int slot; - - if (byte_enable && (condition_mode == 0)) - return -EINVAL; - - if (bp_info->addr >= TASK_SIZE) - return -EIO; - - if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { - slot = 1; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dbcr_dac(child) |= DBCR_DAC1R; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dbcr_dac(child) |= DBCR_DAC1W; - child->thread.debug.dac1 = (unsigned long)bp_info->addr; -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - if (byte_enable) { - child->thread.debug.dvc1 = - (unsigned long)bp_info->condition_value; - child->thread.debug.dbcr2 |= - ((byte_enable << DBCR2_DVC1BE_SHIFT) | - (condition_mode << DBCR2_DVC1M_SHIFT)); - } -#endif -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { - /* Both dac1 and dac2 are part of a range */ - return -ENOSPC; -#endif - } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { - slot = 2; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dbcr_dac(child) |= DBCR_DAC2R; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dbcr_dac(child) |= DBCR_DAC2W; - child->thread.debug.dac2 = (unsigned long)bp_info->addr; -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - if (byte_enable) { - child->thread.debug.dvc2 = - (unsigned long)bp_info->condition_value; - child->thread.debug.dbcr2 |= - ((byte_enable << DBCR2_DVC2BE_SHIFT) | - (condition_mode << DBCR2_DVC2M_SHIFT)); - } -#endif - } else - return -ENOSPC; - child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; - - return slot + 4; -} - -static int del_dac(struct task_struct *child, int slot) -{ - if (slot == 1) { - if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) - return -ENOENT; - - child->thread.debug.dac1 = 0; - dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { - child->thread.debug.dac2 = 0; - child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; - } - child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); -#endif -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.debug.dvc1 = 0; -#endif - } else if (slot == 2) { - if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) - return -ENOENT; - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) - /* Part of a range */ - return -EINVAL; - child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); -#endif -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.debug.dvc2 = 0; -#endif - child->thread.debug.dac2 = 0; - dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); - } else - return -EINVAL; - - return 0; -} -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -static int set_dac_range(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ - int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; - - /* We don't allow range watchpoints to be used with DVC */ - if (bp_info->condition_mode) - return -EINVAL; - - /* - * Best effort to verify the address range. The user/supervisor bits - * prevent trapping in kernel space, but let's fail on an obvious bad - * range. The simple test on the mask is not fool-proof, and any - * exclusive range will spill over into kernel space. - */ - if (bp_info->addr >= TASK_SIZE) - return -EIO; - if (mode == PPC_BREAKPOINT_MODE_MASK) { - /* - * dac2 is a bitmask. Don't allow a mask that makes a - * kernel space address from a valid dac1 value - */ - if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) - return -EIO; - } else { - /* - * For range breakpoints, addr2 must also be a valid address - */ - if (bp_info->addr2 >= TASK_SIZE) - return -EIO; - } - - if (child->thread.debug.dbcr0 & - (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) - return -ENOSPC; - - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); - child->thread.debug.dac1 = bp_info->addr; - child->thread.debug.dac2 = bp_info->addr2; - if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - child->thread.debug.dbcr2 |= DBCR2_DAC12M; - else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - child->thread.debug.dbcr2 |= DBCR2_DAC12MX; - else /* PPC_BREAKPOINT_MODE_MASK */ - child->thread.debug.dbcr2 |= DBCR2_DAC12MM; - child->thread.regs->msr |= MSR_DE; - - return 5; -} -#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ - -static long ppc_set_hwdebug(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int len = 0; - struct thread_struct *thread = &(child->thread); - struct perf_event *bp; - struct perf_event_attr attr; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - struct arch_hw_breakpoint brk; -#endif - - if (bp_info->version != 1) - return -ENOTSUPP; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * Check for invalid flags and combinations - */ - if ((bp_info->trigger_type == 0) || - (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | - PPC_BREAKPOINT_TRIGGER_RW)) || - (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || - (bp_info->condition_mode & - ~(PPC_BREAKPOINT_CONDITION_MODE | - PPC_BREAKPOINT_CONDITION_BE_ALL))) - return -EINVAL; -#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 - if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) - return -EINVAL; -#endif - - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { - if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || - (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) - return -EINVAL; - return set_instruction_bp(child, bp_info); - } - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) - return set_dac(child, bp_info); - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - return set_dac_range(child, bp_info); -#else - return -EINVAL; -#endif -#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ - /* - * We only support one data breakpoint - */ - if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || - (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || - bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) - return -EINVAL; - - if ((unsigned long)bp_info->addr >= TASK_SIZE) - return -EIO; - - brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; - brk.type = HW_BRK_TYPE_TRANSLATE; - brk.len = DABR_MAX_LEN; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - brk.type |= HW_BRK_TYPE_READ; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - brk.type |= HW_BRK_TYPE_WRITE; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - len = bp_info->addr2 - bp_info->addr; - else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) - len = 1; - else - return -EINVAL; - bp = thread->ptrace_bps[0]; - if (bp) - return -ENOSPC; - - /* Create a new breakpoint request if one doesn't exist already */ - hw_breakpoint_init(&attr); - attr.bp_addr = (unsigned long)bp_info->addr; - attr.bp_len = len; - arch_bp_generic_fields(brk.type, &attr.bp_type); - - thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, - ptrace_triggered, NULL, child); - if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; - return PTR_ERR(bp); - } - - return 1; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) - return -EINVAL; - - if (child->thread.hw_brk.address) - return -ENOSPC; - - if (!ppc_breakpoint_available()) - return -ENODEV; - - child->thread.hw_brk = brk; - - return 1; -#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ -} - -static long ppc_del_hwdebug(struct task_struct *child, long data) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int ret = 0; - struct thread_struct *thread = &(child->thread); - struct perf_event *bp; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - int rc; - - if (data <= 4) - rc = del_instruction_bp(child, (int)data); - else - rc = del_dac(child, (int)data - 4); - - if (!rc) { - if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, - child->thread.debug.dbcr1)) { - child->thread.debug.dbcr0 &= ~DBCR0_IDM; - child->thread.regs->msr &= ~MSR_DE; - } - } - return rc; -#else - if (data != 1) - return -EINVAL; - -#ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; - if (bp) { - unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; - } else - ret = -ENOENT; - return ret; -#else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk.address == 0) - return -ENOENT; - - child->thread.hw_brk.address = 0; - child->thread.hw_brk.type = 0; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - return 0; -#endif -} - -long arch_ptrace(struct task_struct *child, long request, - unsigned long addr, unsigned long data) -{ - int ret = -EPERM; - void __user *datavp = (void __user *) data; - unsigned long __user *datalp = datavp; - - switch (request) { - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long index, tmp; - - ret = -EIO; - /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif - break; - - CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { - ret = ptrace_get_reg(child, (int) index, &tmp); - if (ret) - break; - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); - else - tmp = child->thread.fp_state.fpscr; - } - ret = put_user(tmp, datalp); - break; - } - - /* write the word at location addr in the USER area */ - case PTRACE_POKEUSR: { - unsigned long index; - - ret = -EIO; - /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif - break; - - CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { - ret = ptrace_put_reg(child, index, data); - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); - else - child->thread.fp_state.fpscr = data; - ret = 0; - } - break; - } - - case PPC_PTRACE_GETHWDBGINFO: { - struct ppc_debug_info dbginfo; - - dbginfo.version = 1; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; - dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; - dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; - dbginfo.data_bp_alignment = 4; - dbginfo.sizeof_condition = 4; - dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | - PPC_DEBUG_FEATURE_INSN_BP_MASK; -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - dbginfo.features |= - PPC_DEBUG_FEATURE_DATA_BP_RANGE | - PPC_DEBUG_FEATURE_DATA_BP_MASK; -#endif -#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ - dbginfo.num_instruction_bps = 0; - if (ppc_breakpoint_available()) - dbginfo.num_data_bps = 1; - else - dbginfo.num_data_bps = 0; - dbginfo.num_condition_regs = 0; -#ifdef CONFIG_PPC64 - dbginfo.data_bp_alignment = 8; -#else - dbginfo.data_bp_alignment = 4; -#endif - dbginfo.sizeof_condition = 0; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; - if (dawr_enabled()) - dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; -#else - dbginfo.features = 0; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - - if (copy_to_user(datavp, &dbginfo, - sizeof(struct ppc_debug_info))) - return -EFAULT; - return 0; - } - - case PPC_PTRACE_SETHWDEBUG: { - struct ppc_hw_breakpoint bp_info; - - if (copy_from_user(&bp_info, datavp, - sizeof(struct ppc_hw_breakpoint))) - return -EFAULT; - return ppc_set_hwdebug(child, &bp_info); - } - - case PPC_PTRACE_DELHWDEBUG: { - ret = ppc_del_hwdebug(child, data); - break; - } - - case PTRACE_GET_DEBUGREG: { -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dabr_fake; -#endif - ret = -EINVAL; - /* We only support one DABR and no IABRS at the moment */ - if (addr > 0) - break; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.debug.dac1, datalp); -#else - dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); - ret = put_user(dabr_fake, datalp); -#endif - break; - } - - case PTRACE_SET_DEBUGREG: - ret = ptrace_set_debugreg(child, addr, data); - break; - -#ifdef CONFIG_PPC64 - case PTRACE_GETREGS64: -#endif - case PTRACE_GETREGS: /* Get all pt_regs from the child. */ - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_GPR, - 0, sizeof(struct user_pt_regs), - datavp); - -#ifdef CONFIG_PPC64 - case PTRACE_SETREGS64: -#endif - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_GPR, - 0, sizeof(struct user_pt_regs), - datavp); - - case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_FPR, - 0, sizeof(elf_fpregset_t), - datavp); - - case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_FPR, - 0, sizeof(elf_fpregset_t), - datavp); - -#ifdef CONFIG_ALTIVEC - case PTRACE_GETVRREGS: - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_VMX, - 0, (33 * sizeof(vector128) + - sizeof(u32)), - datavp); - - case PTRACE_SETVRREGS: - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_VMX, - 0, (33 * sizeof(vector128) + - sizeof(u32)), - datavp); -#endif -#ifdef CONFIG_VSX - case PTRACE_GETVSRREGS: - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_VSX, - 0, 32 * sizeof(double), - datavp); - - case PTRACE_SETVSRREGS: - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_VSX, - 0, 32 * sizeof(double), - datavp); -#endif -#ifdef CONFIG_SPE - case PTRACE_GETEVRREGS: - /* Get the child spe register state. */ - return copy_regset_to_user(child, &user_ppc_native_view, - REGSET_SPE, 0, 35 * sizeof(u32), - datavp); - - case PTRACE_SETEVRREGS: - /* Set the child spe register state. */ - return copy_regset_from_user(child, &user_ppc_native_view, - REGSET_SPE, 0, 35 * sizeof(u32), - datavp); -#endif - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - return ret; -} - -#ifdef CONFIG_SECCOMP -static int do_seccomp(struct pt_regs *regs) -{ - if (!test_thread_flag(TIF_SECCOMP)) - return 0; - - /* - * The ABI we present to seccomp tracers is that r3 contains - * the syscall return value and orig_gpr3 contains the first - * syscall parameter. This is different to the ptrace ABI where - * both r3 and orig_gpr3 contain the first syscall parameter. - */ - regs->gpr[3] = -ENOSYS; - - /* - * We use the __ version here because we have already checked - * TIF_SECCOMP. If this fails, there is nothing left to do, we - * have already loaded -ENOSYS into r3, or seccomp has put - * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). - */ - if (__secure_computing(NULL)) - return -1; - - /* - * The syscall was allowed by seccomp, restore the register - * state to what audit expects. - * Note that we use orig_gpr3, which means a seccomp tracer can - * modify the first syscall parameter (in orig_gpr3) and also - * allow the syscall to proceed. - */ - regs->gpr[3] = regs->orig_gpr3; - - return 0; -} -#else -static inline int do_seccomp(struct pt_regs *regs) { return 0; } -#endif /* CONFIG_SECCOMP */ - -/** - * do_syscall_trace_enter() - Do syscall tracing on kernel entry. - * @regs: the pt_regs of the task to trace (current) - * - * Performs various types of tracing on syscall entry. This includes seccomp, - * ptrace, syscall tracepoints and audit. - * - * The pt_regs are potentially visible to userspace via ptrace, so their - * contents is ABI. - * - * One or more of the tracers may modify the contents of pt_regs, in particular - * to modify arguments or even the syscall number itself. - * - * It's also possible that a tracer can choose to reject the system call. In - * that case this function will return an illegal syscall number, and will put - * an appropriate return value in regs->r3. - * - * Return: the (possibly changed) syscall number. - */ -long do_syscall_trace_enter(struct pt_regs *regs) -{ - u32 flags; - - user_exit(); - - flags = READ_ONCE(current_thread_info()->flags) & - (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); - - if (flags) { - int rc = tracehook_report_syscall_entry(regs); - - if (unlikely(flags & _TIF_SYSCALL_EMU)) { - /* - * A nonzero return code from - * tracehook_report_syscall_entry() tells us to prevent - * the syscall execution, but we are not going to - * execute it anyway. - * - * Returning -1 will skip the syscall execution. We want - * to avoid clobbering any registers, so we don't goto - * the skip label below. - */ - return -1; - } - - if (rc) { - /* - * The tracer decided to abort the syscall. Note that - * the tracer may also just change regs->gpr[0] to an - * invalid syscall number, that is handled below on the - * exit path. - */ - goto skip; - } - } - - /* Run seccomp after ptrace; allow it to set gpr[3]. */ - if (do_seccomp(regs)) - return -1; - - /* Avoid trace and audit when syscall is invalid. */ - if (regs->gpr[0] >= NR_syscalls) - goto skip; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gpr[0]); - -#ifdef CONFIG_PPC64 - if (!is_32bit_task()) - audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else -#endif - audit_syscall_entry(regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - - /* Return the possibly modified but valid syscall number */ - return regs->gpr[0]; - -skip: - /* - * If we are aborting explicitly, or if the syscall number is - * now invalid, set the return value to -ENOSYS. - */ - regs->gpr[3] = -ENOSYS; - return -1; -} - -void do_syscall_trace_leave(struct pt_regs *regs) -{ - int step; - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->result); - - step = test_thread_flag(TIF_SINGLESTEP); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); - - user_enter(); -} - -void __init pt_regs_check(void); - -/* - * Dummy function, its purpose is to break the build if struct pt_regs and - * struct user_pt_regs don't match. - */ -void __init pt_regs_check(void) -{ - BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != - offsetof(struct user_pt_regs, gpr)); - BUILD_BUG_ON(offsetof(struct pt_regs, nip) != - offsetof(struct user_pt_regs, nip)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct user_pt_regs, orig_gpr3)); - BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != - offsetof(struct user_pt_regs, ctr)); - BUILD_BUG_ON(offsetof(struct pt_regs, link) != - offsetof(struct user_pt_regs, link)); - BUILD_BUG_ON(offsetof(struct pt_regs, xer) != - offsetof(struct user_pt_regs, xer)); - BUILD_BUG_ON(offsetof(struct pt_regs, ccr) != - offsetof(struct user_pt_regs, ccr)); -#ifdef __powerpc64__ - BUILD_BUG_ON(offsetof(struct pt_regs, softe) != - offsetof(struct user_pt_regs, softe)); -#else - BUILD_BUG_ON(offsetof(struct pt_regs, mq) != - offsetof(struct user_pt_regs, mq)); -#endif - BUILD_BUG_ON(offsetof(struct pt_regs, trap) != - offsetof(struct user_pt_regs, trap)); - BUILD_BUG_ON(offsetof(struct pt_regs, dar) != - offsetof(struct user_pt_regs, dar)); - BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != - offsetof(struct user_pt_regs, dsisr)); - BUILD_BUG_ON(offsetof(struct pt_regs, result) != - offsetof(struct user_pt_regs, result)); - - BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); - - // Now check that the pt_regs offsets match the uapi #defines - #define CHECK_REG(_pt, _reg) \ - BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \ - sizeof(unsigned long))); - - CHECK_REG(PT_R0, gpr[0]); - CHECK_REG(PT_R1, gpr[1]); - CHECK_REG(PT_R2, gpr[2]); - CHECK_REG(PT_R3, gpr[3]); - CHECK_REG(PT_R4, gpr[4]); - CHECK_REG(PT_R5, gpr[5]); - CHECK_REG(PT_R6, gpr[6]); - CHECK_REG(PT_R7, gpr[7]); - CHECK_REG(PT_R8, gpr[8]); - CHECK_REG(PT_R9, gpr[9]); - CHECK_REG(PT_R10, gpr[10]); - CHECK_REG(PT_R11, gpr[11]); - CHECK_REG(PT_R12, gpr[12]); - CHECK_REG(PT_R13, gpr[13]); - CHECK_REG(PT_R14, gpr[14]); - CHECK_REG(PT_R15, gpr[15]); - CHECK_REG(PT_R16, gpr[16]); - CHECK_REG(PT_R17, gpr[17]); - CHECK_REG(PT_R18, gpr[18]); - CHECK_REG(PT_R19, gpr[19]); - CHECK_REG(PT_R20, gpr[20]); - CHECK_REG(PT_R21, gpr[21]); - CHECK_REG(PT_R22, gpr[22]); - CHECK_REG(PT_R23, gpr[23]); - CHECK_REG(PT_R24, gpr[24]); - CHECK_REG(PT_R25, gpr[25]); - CHECK_REG(PT_R26, gpr[26]); - CHECK_REG(PT_R27, gpr[27]); - CHECK_REG(PT_R28, gpr[28]); - CHECK_REG(PT_R29, gpr[29]); - CHECK_REG(PT_R30, gpr[30]); - CHECK_REG(PT_R31, gpr[31]); - CHECK_REG(PT_NIP, nip); - CHECK_REG(PT_MSR, msr); - CHECK_REG(PT_ORIG_R3, orig_gpr3); - CHECK_REG(PT_CTR, ctr); - CHECK_REG(PT_LNK, link); - CHECK_REG(PT_XER, xer); - CHECK_REG(PT_CCR, ccr); -#ifdef CONFIG_PPC64 - CHECK_REG(PT_SOFTE, softe); -#else - CHECK_REG(PT_MQ, mq); -#endif - CHECK_REG(PT_TRAP, trap); - CHECK_REG(PT_DAR, dar); - CHECK_REG(PT_DSISR, dsisr); - CHECK_REG(PT_RESULT, result); - #undef CHECK_REG - - BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); - - /* - * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the - * real registers. - */ - BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); -} diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile new file mode 100644 index 000000000000..02fb28eb3b55 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux kernel. +# + +CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' + +obj-y += ptrace.o +obj-$(CONFIG_PPC64) += ptrace32.o diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c new file mode 100644 index 000000000000..25c0424e8868 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -0,0 +1,3468 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@samba.org). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file README.legal in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +/* + * The parameter save area on the stack is used to store arguments being passed + * to callee function and is located at fixed offset from stack pointer. + */ +#ifdef CONFIG_PPC32 +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ +#else /* CONFIG_PPC32 */ +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ +#endif + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +#define TVSO(f) (offsetof(struct thread_vr_state, f)) +#define TFSO(f) (offsetof(struct thread_fp_state, f)) +#define TSO(f) (offsetof(struct thread_struct, f)) + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. + */ + + if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) + return; + + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&(tsk->thread)); + } +} +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_DEBUGCHANGE 0 +#else +#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) +#endif + +/* + * Max register writeable via put_reg + */ +#ifdef CONFIG_PPC32 +#define PT_MAX_PUT_REG PT_MQ +#else +#define PT_MAX_PUT_REG PT_CCR +#endif + +static unsigned long get_user_msr(struct task_struct *task) +{ + return task->thread.regs->msr | task->thread.fpexc_mode; +} + +static int set_user_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.regs->msr &= ~MSR_DEBUGCHANGE; + task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static unsigned long get_user_ckpt_msr(struct task_struct *task) +{ + return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; +} + +static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; + task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.ckpt_regs.trap = trap & 0xfff0; + return 0; +} +#endif + +#ifdef CONFIG_PPC64 +static int get_user_dscr(struct task_struct *task, unsigned long *data) +{ + *data = task->thread.dscr; + return 0; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + task->thread.dscr = dscr; + task->thread.dscr_inherit = 1; + return 0; +} +#else +static int get_user_dscr(struct task_struct *task, unsigned long *data) +{ + return -EIO; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + return -EIO; +} +#endif + +/* + * We prevent mucking around with the reserved area of trap + * which are used internally by the kernel. + */ +static int set_user_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.regs->trap = trap & 0xfff0; + return 0; +} + +/* + * Get contents of register REGNO in task TASK. + */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) +{ + unsigned int regs_max; + + if ((task->thread.regs == NULL) || !data) + return -EIO; + + if (regno == PT_MSR) { + *data = get_user_msr(task); + return 0; + } + + if (regno == PT_DSCR) + return get_user_dscr(task, data); + +#ifdef CONFIG_PPC64 + /* + * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is + * no more used as a flag, lets force usr to alway see the softe value as 1 + * which means interrupts are not soft disabled. + */ + if (regno == PT_SOFTE) { + *data = 1; + return 0; + } +#endif + + regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); + if (regno < regs_max) { + regno = array_index_nospec(regno, regs_max); + *data = ((unsigned long *)task->thread.regs)[regno]; + return 0; + } + + return -EIO; +} + +/* + * Write contents of register REGNO in task TASK. + */ +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) +{ + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) + return set_user_msr(task, data); + if (regno == PT_TRAP) + return set_user_trap(task, data); + if (regno == PT_DSCR) + return set_user_dscr(task, data); + + if (regno <= PT_MAX_PUT_REG) { + regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1); + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static int gpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int i, ret; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_msr(target); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct user_pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct user_pt_regs), -1); + + return ret; +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ +#ifdef CONFIG_VSX + u64 buf[33]; + int i; + + flush_fp_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +#else + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_state, 0, -1); +#endif +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + */ +static int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ +#ifdef CONFIG_VSX + u64 buf[33]; + int i; + + flush_fp_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + target->thread.fp_state.fpscr = buf[32]; + return 0; +#else + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_state, 0, -1); +#endif +} + +#ifdef CONFIG_ALTIVEC +/* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +static int vr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_altivec_to_thread(target); + return target->thread.used_vr ? regset->n : 0; +} + +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ +static int vr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + int start, end; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + + vrsave.word = target->thread.vrsave; + + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + start, end); + } + + return ret; +} + +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ +static int vr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the first word of vrsave. + */ + int start, end; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + + vrsave.word = target->thread.vrsave; + + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + start, end); + if (!ret) + target->thread.vrsave = vrsave.word; + } + + return ret; +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +/* + * Currently to set and and get all the vsx state, you need to call + * the fp and VMX calls as well. This only get/sets the lower 32 + * 128bit VSX registers. + */ + +static int vsr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ +static int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ +static int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret,i; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_SPE + +/* + * For get_evrregs/set_evrregs functions 'data' has the following layout: + * + * struct { + * u32 evr[32]; + * u64 acc; + * u32 spefscr; + * } + */ + +static int evr_active(struct task_struct *target, + const struct user_regset *regset) +{ + flush_spe_to_thread(target); + return target->thread.used_spe ? regset->n : 0; +} + +static int evr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} + +static int evr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} +#endif /* CONFIG_SPE */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/** + * tm_cgpr_active - get active number of registers in CGPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed GPR category. + */ +static int tm_cgpr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cgpr_get - get CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds all the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function gets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +static int tm_cgpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_ckpt_msr(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct user_pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct user_pt_regs), -1); + + return ret; +} + +/* + * tm_cgpr_set - set the CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function sets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +static int tm_cgpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore( + &pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +/** + * tm_cfpr_active - get active number of registers in CFPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed FPR category. + */ +static int tm_cfpr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cfpr_get - get CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed FPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +static int tm_cfpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/** + * tm_cfpr_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * FPR register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +static int tm_cfpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_CKFPR(i) = buf[i]; + target->thread.ckfp_state.fpscr = buf[32]; + return 0; +} + +/** + * tm_cvmx_active - get active number of registers in CVMX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in checkpointed VMX category. + */ +static int tm_cvmx_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cvmx_get - get CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +static int tm_cvmx_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckvr_state, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + } + + return ret; +} + +/** + * tm_cvmx_set - set CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +static int tm_cvmx_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckvr_state, 0, + 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + if (!ret) + target->thread.ckvrsave = vrsave.word; + } + + return ret; +} + +/** + * tm_cvsx_active - get active number of registers in CVSX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed VSX category. + */ +static int tm_cvsx_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/** + * tm_cvsx_get - get CVSX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed VSX registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +static int tm_cvsx_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/** + * tm_cvsx_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * VSX register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +static int tm_cvsx_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} + +/** + * tm_spr_active - get active number of registers in TM SPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks the active number of available + * regisers in the transactional memory SPR category. + */ +static int tm_spr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + return regset->n; +} + +/** + * tm_spr_get - get the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +static int tm_spr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +/** + * tm_spr_set - set the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +static int tm_spr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +static int tm_tar_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +static int tm_tar_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +static int tm_tar_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +static int tm_ppr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + + +static int tm_ppr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +static int tm_ppr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +static int tm_dscr_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +static int tm_dscr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +static int tm_dscr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +#ifdef CONFIG_PPC64 +static int ppr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); +} + +static int ppr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); +} + +static int dscr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); +} +static int dscr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); +} +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +static int tar_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); +} +static int tar_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); +} + +static int ebb_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return regset->n; + + return 0; +} + +static int ebb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (!target->thread.used_ebb) + return -ENODATA; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbrr, 0, 3 * sizeof(unsigned long)); +} + +static int ebb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbrr, 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbhr, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.bescr, + 2 * sizeof(unsigned long), 3 * sizeof(unsigned long)); + + return ret; +} +static int pmu_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return regset->n; +} + +static int pmu_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.siar, 0, + 5 * sizeof(unsigned long)); +} + +static int pmu_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.siar, 0, + sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sdar, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sier, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr2, 3 * sizeof(unsigned long), + 4 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr0, 4 * sizeof(unsigned long), + 5 * sizeof(unsigned long)); + return ret; +} +#endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.amr, 0, + ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + +/* + * These are our native regset flavors. + */ +enum powerpc_regset { + REGSET_GPR, + REGSET_FPR, +#ifdef CONFIG_ALTIVEC + REGSET_VMX, +#endif +#ifdef CONFIG_VSX + REGSET_VSX, +#endif +#ifdef CONFIG_SPE + REGSET_SPE, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + REGSET_TM_CGPR, /* TM checkpointed GPR registers */ + REGSET_TM_CFPR, /* TM checkpointed FPR registers */ + REGSET_TM_CVMX, /* TM checkpointed VMX registers */ + REGSET_TM_CVSX, /* TM checkpointed VSX registers */ + REGSET_TM_SPR, /* TM specific SPR registers */ + REGSET_TM_CTAR, /* TM checkpointed TAR register */ + REGSET_TM_CPPR, /* TM checkpointed PPR register */ + REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ +#endif +#ifdef CONFIG_PPC64 + REGSET_PPR, /* PPR register */ + REGSET_DSCR, /* DSCR register */ +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + REGSET_TAR, /* TAR register */ + REGSET_EBB, /* EBB registers */ + REGSET_PMR, /* Performance Monitor Registers */ +#endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif +}; + +static const struct user_regset native_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .get = gpr_get, .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_VSX + [REGSET_VSX] = { + .core_note_type = NT_PPC_VSX, .n = 32, + .size = sizeof(double), .align = sizeof(double), + .active = vsr_active, .get = vsr_get, .set = vsr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, + [REGSET_PMR] = { + .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + .size = sizeof(u64), .align = sizeof(u64), + .active = pmu_active, .get = pmu_get, .set = pmu_set + }, +#endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif +}; + +static const struct user_regset_view user_ppc_native_view = { + .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +#ifdef CONFIG_PPC64 +#include + +static int gpr32_get_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + unsigned long *regs) +{ + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_MSR; --count) + if (__put_user((compat_ulong_t) regs[pos++], u++)) + return -EFAULT; + + if (count > 0 && pos == PT_MSR) { + reg = get_user_msr(target); + if (kbuf) + *k++ = reg; + else if (__put_user(reg, u++)) + return -EFAULT; + ++pos; + --count; + } + + if (kbuf) + for (; count > 0 && pos < PT_REGS_COUNT; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_REGS_COUNT; --count) + if (__put_user((compat_ulong_t) regs[pos++], u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + PT_REGS_COUNT * sizeof(reg), -1); +} + +static int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + else + for (; count > 0 && pos < PT_MSR; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + + + if (count > 0 && pos == PT_MSR) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_msr(target, reg); + ++pos; + --count; + } + + if (kbuf) { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + } else { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + if (__get_user(reg, u++)) + return -EFAULT; + } + + if (count > 0 && pos == PT_TRAP) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_trap(target, reg); + ++pos; + --count; + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static int tm_cgpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} + +static int tm_cgpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int i; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* + * We have a partial register set. + * Fill 14-31 with bogus values. + */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +/* + * These are the regset flavors matching the CONFIG_PPC32 native set. + */ +static const struct user_regset compat_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), + .get = gpr32_get, .set = gpr32_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, + .get = tm_cgpr32_get, .set = tm_cgpr32_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, +#endif +}; + +static const struct user_regset_view user_ppc_compat_view = { + .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, + .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) +}; +#endif /* CONFIG_PPC64 */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_PPC64 + if (test_tsk_thread_flag(task, TIF_32BIT)) + return &user_ppc_compat_view; +#endif + return &user_ppc_native_view; +} + + +void user_enable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + task->thread.debug.dbcr0 &= ~DBCR0_BT; + task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; + regs->msr |= MSR_DE; +#else + regs->msr &= ~MSR_BE; + regs->msr |= MSR_SE; +#endif + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + task->thread.debug.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; + regs->msr |= MSR_DE; +#else + regs->msr &= ~MSR_SE; + regs->msr |= MSR_BE; +#endif + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT); + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.debug.dbcr0 &= ~DBCR0_IDM; + regs->msr &= ~MSR_DE; + } +#else + regs->msr &= ~(MSR_SE | MSR_BE); +#endif + } + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void ptrace_triggered(struct perf_event *bp, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event_attr attr; + + /* + * Disable the breakpoint request here since ptrace has defined a + * one-shot behaviour for breakpoint exceptions in PPC64. + * The SIGTRAP signal is generated automatically for us in do_dabr(). + * We don't have to do anything about that here + */ + attr = bp->attr; + attr.disabled = true; + modify_user_hw_breakpoint(bp, &attr); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, + unsigned long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &(task->thread); + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + bool set_bp = true; + struct arch_hw_breakpoint hw_brk; +#endif + + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits in dabr are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ + if (data && !(data & HW_BRK_TYPE_TRANSLATE)) + return -EIO; + hw_brk.address = data & (~HW_BRK_TYPE_DABR); + hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; + hw_brk.len = DABR_MAX_LEN; + hw_brk.hw_len = DABR_MAX_LEN; + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if (!set_bp) { + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } + return 0; + } + if (bp) { + attr = bp->attr; + attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; + arch_bp_generic_fields(hw_brk.type, &attr.bp_type); + + /* Enable breakpoint */ + attr.disabled = false; + + ret = modify_user_hw_breakpoint(bp, &attr); + if (ret) { + return ret; + } + thread->ptrace_bps[0] = bp; + thread->hw_brk = hw_brk; + return 0; + } + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; + arch_bp_generic_fields(hw_brk.type, + &attr.bp_type); + + thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, + ptrace_triggered, NULL, task); + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + if (set_bp && (!ppc_breakpoint_available())) + return -ENODEV; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + task->thread.hw_brk = hw_brk; +#else /* CONFIG_PPC_ADV_DEBUG_REGS */ + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.debug.dac1 = data & ~0x3UL; + + if (task->thread.debug.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; + } + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 + register */ + task->thread.debug.dbcr0 |= DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 + accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); + if (data & 0x1UL) + dbcr_dac(task) |= DBCR_DAC1R; + if (data & 0x2UL) + dbcr_dac(task) |= DBCR_DAC1W; + task->thread.regs->msr |= MSR_DE; +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + return 0; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* make sure the single step bit is not set. */ + user_disable_single_step(child); +} + +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static long set_instruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if ((!slot1_in_use) && (!slot2_in_use)) { + slot = 1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.iac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.iac4 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else + return -ENOSPC; + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || (slot3_in_use == slot4_in_use)) { + slot = 1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.debug.iac2 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.debug.iac4 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC4; +#endif + } else + return -ENOSPC; + } +out: + child->thread.debug.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.debug.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.debug.iac1 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.debug.iac2 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.debug.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.debug.iac3 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.debug.iac4 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && (condition_mode == 0)) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.debug.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.debug.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.debug.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.debug.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.debug.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.debug.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else + return -ENOSPC; + child->thread.debug.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.debug.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + child->thread.debug.dac2 = 0; + child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.debug.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.debug.dvc2 = 0; +#endif + child->thread.debug.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else + return -EINVAL; + + return 0; +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.debug.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.debug.dac1 = bp_info->addr; + child->thread.debug.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.debug.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.debug.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.debug.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +static long ppc_set_hwdebug(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int len = 0; + struct thread_struct *thread = &(child->thread); + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + struct arch_hw_breakpoint brk; +#endif + + if (bp_info->version != 1) + return -ENOTSUPP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * Check for invalid flags and combinations + */ + if ((bp_info->trigger_type == 0) || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + return set_instruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ + /* + * We only support one data breakpoint + */ + if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || + (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; + brk.type = HW_BRK_TYPE_TRANSLATE; + brk.len = DABR_MAX_LEN; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + brk.type |= HW_BRK_TYPE_READ; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + brk.type |= HW_BRK_TYPE_WRITE; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + len = bp_info->addr2 - bp_info->addr; + else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else + return -EINVAL; + bp = thread->ptrace_bps[0]; + if (bp) + return -ENOSPC; + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = (unsigned long)bp_info->addr; + attr.bp_len = len; + arch_bp_generic_fields(brk.type, &attr.bp_type); + + thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, + ptrace_triggered, NULL, child); + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + + return 1; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) + return -EINVAL; + + if (child->thread.hw_brk.address) + return -ENOSPC; + + if (!ppc_breakpoint_available()) + return -ENODEV; + + child->thread.hw_brk = brk; + + return 1; +#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ +} + +static long ppc_del_hwdebug(struct task_struct *child, long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret = 0; + struct thread_struct *thread = &(child->thread); + struct perf_event *bp; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, + child->thread.debug.dbcr1)) { + child->thread.debug.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +#else + if (data != 1) + return -EINVAL; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } else + ret = -ENOENT; + return ret; +#else /* CONFIG_HAVE_HW_BREAKPOINT */ + if (child->thread.hw_brk.address == 0) + return -ENOENT; + + child->thread.hw_brk.address = 0; + child->thread.hw_brk.type = 0; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + return 0; +#endif +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret = -EPERM; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; + + switch (request) { + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long index, tmp; + + ret = -EIO; + /* convert to index and check */ +#ifdef CONFIG_PPC32 + index = addr >> 2; + if ((addr & 3) || (index > PT_FPSCR) + || (child->thread.regs == NULL)) +#else + index = addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) +#endif + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_get_reg(child, (int) index, &tmp); + if (ret) + break; + } else { + unsigned int fpidx = index - PT_FPR0; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); + else + tmp = child->thread.fp_state.fpscr; + } + ret = put_user(tmp, datalp); + break; + } + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ +#ifdef CONFIG_PPC32 + index = addr >> 2; + if ((addr & 3) || (index > PT_FPSCR) + || (child->thread.regs == NULL)) +#else + index = addr >> 3; + if ((addr & 7) || (index > PT_FPSCR)) +#endif + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_put_reg(child, index, data); + } else { + unsigned int fpidx = index - PT_FPR0; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); + else + child->thread.fp_state.fpscr = data; + ret = 0; + } + break; + } + + case PPC_PTRACE_GETHWDBGINFO: { + struct ppc_debug_info dbginfo; + + dbginfo.version = 1; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo.data_bp_alignment = 4; + dbginfo.sizeof_condition = 4; + dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + dbginfo.features |= + PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ + dbginfo.num_instruction_bps = 0; + if (ppc_breakpoint_available()) + dbginfo.num_data_bps = 1; + else + dbginfo.num_data_bps = 0; + dbginfo.num_condition_regs = 0; +#ifdef CONFIG_PPC64 + dbginfo.data_bp_alignment = 8; +#else + dbginfo.data_bp_alignment = 4; +#endif + dbginfo.sizeof_condition = 0; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; + if (dawr_enabled()) + dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; +#else + dbginfo.features = 0; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + + if (copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info))) + return -EFAULT; + return 0; + } + + case PPC_PTRACE_SETHWDEBUG: { + struct ppc_hw_breakpoint bp_info; + + if (copy_from_user(&bp_info, datavp, + sizeof(struct ppc_hw_breakpoint))) + return -EFAULT; + return ppc_set_hwdebug(child, &bp_info); + } + + case PPC_PTRACE_DELHWDEBUG: { + ret = ppc_del_hwdebug(child, data); + break; + } + + case PTRACE_GET_DEBUGREG: { +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr_fake; +#endif + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.debug.dac1, datalp); +#else + dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + ret = put_user(dabr_fake, datalp); +#endif + break; + } + + case PTRACE_SET_DEBUGREG: + ret = ptrace_set_debugreg(child, addr, data); + break; + +#ifdef CONFIG_PPC64 + case PTRACE_GETREGS64: +#endif + case PTRACE_GETREGS: /* Get all pt_regs from the child. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_GPR, + 0, sizeof(struct user_pt_regs), + datavp); + +#ifdef CONFIG_PPC64 + case PTRACE_SETREGS64: +#endif + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_GPR, + 0, sizeof(struct user_pt_regs), + datavp); + + case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_FPR, + 0, sizeof(elf_fpregset_t), + datavp); + + case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_FPR, + 0, sizeof(elf_fpregset_t), + datavp); + +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VMX, + 0, (33 * sizeof(vector128) + + sizeof(u32)), + datavp); + + case PTRACE_SETVRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VMX, + 0, (33 * sizeof(vector128) + + sizeof(u32)), + datavp); +#endif +#ifdef CONFIG_VSX + case PTRACE_GETVSRREGS: + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + datavp); + + case PTRACE_SETVSRREGS: + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_VSX, + 0, 32 * sizeof(double), + datavp); +#endif +#ifdef CONFIG_SPE + case PTRACE_GETEVRREGS: + /* Get the child spe register state. */ + return copy_regset_to_user(child, &user_ppc_native_view, + REGSET_SPE, 0, 35 * sizeof(u32), + datavp); + + case PTRACE_SETEVRREGS: + /* Set the child spe register state. */ + return copy_regset_from_user(child, &user_ppc_native_view, + REGSET_SPE, 0, 35 * sizeof(u32), + datavp); +#endif + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + return ret; +} + +#ifdef CONFIG_SECCOMP +static int do_seccomp(struct pt_regs *regs) +{ + if (!test_thread_flag(TIF_SECCOMP)) + return 0; + + /* + * The ABI we present to seccomp tracers is that r3 contains + * the syscall return value and orig_gpr3 contains the first + * syscall parameter. This is different to the ptrace ABI where + * both r3 and orig_gpr3 contain the first syscall parameter. + */ + regs->gpr[3] = -ENOSYS; + + /* + * We use the __ version here because we have already checked + * TIF_SECCOMP. If this fails, there is nothing left to do, we + * have already loaded -ENOSYS into r3, or seccomp has put + * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). + */ + if (__secure_computing(NULL)) + return -1; + + /* + * The syscall was allowed by seccomp, restore the register + * state to what audit expects. + * Note that we use orig_gpr3, which means a seccomp tracer can + * modify the first syscall parameter (in orig_gpr3) and also + * allow the syscall to proceed. + */ + regs->gpr[3] = regs->orig_gpr3; + + return 0; +} +#else +static inline int do_seccomp(struct pt_regs *regs) { return 0; } +#endif /* CONFIG_SECCOMP */ + +/** + * do_syscall_trace_enter() - Do syscall tracing on kernel entry. + * @regs: the pt_regs of the task to trace (current) + * + * Performs various types of tracing on syscall entry. This includes seccomp, + * ptrace, syscall tracepoints and audit. + * + * The pt_regs are potentially visible to userspace via ptrace, so their + * contents is ABI. + * + * One or more of the tracers may modify the contents of pt_regs, in particular + * to modify arguments or even the syscall number itself. + * + * It's also possible that a tracer can choose to reject the system call. In + * that case this function will return an illegal syscall number, and will put + * an appropriate return value in regs->r3. + * + * Return: the (possibly changed) syscall number. + */ +long do_syscall_trace_enter(struct pt_regs *regs) +{ + u32 flags; + + user_exit(); + + flags = READ_ONCE(current_thread_info()->flags) & + (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); + + if (flags) { + int rc = tracehook_report_syscall_entry(regs); + + if (unlikely(flags & _TIF_SYSCALL_EMU)) { + /* + * A nonzero return code from + * tracehook_report_syscall_entry() tells us to prevent + * the syscall execution, but we are not going to + * execute it anyway. + * + * Returning -1 will skip the syscall execution. We want + * to avoid clobbering any registers, so we don't goto + * the skip label below. + */ + return -1; + } + + if (rc) { + /* + * The tracer decided to abort the syscall. Note that + * the tracer may also just change regs->gpr[0] to an + * invalid syscall number, that is handled below on the + * exit path. + */ + goto skip; + } + } + + /* Run seccomp after ptrace; allow it to set gpr[3]. */ + if (do_seccomp(regs)) + return -1; + + /* Avoid trace and audit when syscall is invalid. */ + if (regs->gpr[0] >= NR_syscalls) + goto skip; + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gpr[0]); + +#ifdef CONFIG_PPC64 + if (!is_32bit_task()) + audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], + regs->gpr[5], regs->gpr[6]); + else +#endif + audit_syscall_entry(regs->gpr[0], + regs->gpr[3] & 0xffffffff, + regs->gpr[4] & 0xffffffff, + regs->gpr[5] & 0xffffffff, + regs->gpr[6] & 0xffffffff); + + /* Return the possibly modified but valid syscall number */ + return regs->gpr[0]; + +skip: + /* + * If we are aborting explicitly, or if the syscall number is + * now invalid, set the return value to -ENOSYS. + */ + regs->gpr[3] = -ENOSYS; + return -1; +} + +void do_syscall_trace_leave(struct pt_regs *regs) +{ + int step; + + audit_syscall_exit(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->result); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); + + user_enter(); +} + +void __init pt_regs_check(void); + +/* + * Dummy function, its purpose is to break the build if struct pt_regs and + * struct user_pt_regs don't match. + */ +void __init pt_regs_check(void) +{ + BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != + offsetof(struct user_pt_regs, gpr)); + BUILD_BUG_ON(offsetof(struct pt_regs, nip) != + offsetof(struct user_pt_regs, nip)); + BUILD_BUG_ON(offsetof(struct pt_regs, msr) != + offsetof(struct user_pt_regs, msr)); + BUILD_BUG_ON(offsetof(struct pt_regs, msr) != + offsetof(struct user_pt_regs, msr)); + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct user_pt_regs, orig_gpr3)); + BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != + offsetof(struct user_pt_regs, ctr)); + BUILD_BUG_ON(offsetof(struct pt_regs, link) != + offsetof(struct user_pt_regs, link)); + BUILD_BUG_ON(offsetof(struct pt_regs, xer) != + offsetof(struct user_pt_regs, xer)); + BUILD_BUG_ON(offsetof(struct pt_regs, ccr) != + offsetof(struct user_pt_regs, ccr)); +#ifdef __powerpc64__ + BUILD_BUG_ON(offsetof(struct pt_regs, softe) != + offsetof(struct user_pt_regs, softe)); +#else + BUILD_BUG_ON(offsetof(struct pt_regs, mq) != + offsetof(struct user_pt_regs, mq)); +#endif + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != + offsetof(struct user_pt_regs, trap)); + BUILD_BUG_ON(offsetof(struct pt_regs, dar) != + offsetof(struct user_pt_regs, dar)); + BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != + offsetof(struct user_pt_regs, dsisr)); + BUILD_BUG_ON(offsetof(struct pt_regs, result) != + offsetof(struct user_pt_regs, result)); + + BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // Now check that the pt_regs offsets match the uapi #defines + #define CHECK_REG(_pt, _reg) \ + BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \ + sizeof(unsigned long))); + + CHECK_REG(PT_R0, gpr[0]); + CHECK_REG(PT_R1, gpr[1]); + CHECK_REG(PT_R2, gpr[2]); + CHECK_REG(PT_R3, gpr[3]); + CHECK_REG(PT_R4, gpr[4]); + CHECK_REG(PT_R5, gpr[5]); + CHECK_REG(PT_R6, gpr[6]); + CHECK_REG(PT_R7, gpr[7]); + CHECK_REG(PT_R8, gpr[8]); + CHECK_REG(PT_R9, gpr[9]); + CHECK_REG(PT_R10, gpr[10]); + CHECK_REG(PT_R11, gpr[11]); + CHECK_REG(PT_R12, gpr[12]); + CHECK_REG(PT_R13, gpr[13]); + CHECK_REG(PT_R14, gpr[14]); + CHECK_REG(PT_R15, gpr[15]); + CHECK_REG(PT_R16, gpr[16]); + CHECK_REG(PT_R17, gpr[17]); + CHECK_REG(PT_R18, gpr[18]); + CHECK_REG(PT_R19, gpr[19]); + CHECK_REG(PT_R20, gpr[20]); + CHECK_REG(PT_R21, gpr[21]); + CHECK_REG(PT_R22, gpr[22]); + CHECK_REG(PT_R23, gpr[23]); + CHECK_REG(PT_R24, gpr[24]); + CHECK_REG(PT_R25, gpr[25]); + CHECK_REG(PT_R26, gpr[26]); + CHECK_REG(PT_R27, gpr[27]); + CHECK_REG(PT_R28, gpr[28]); + CHECK_REG(PT_R29, gpr[29]); + CHECK_REG(PT_R30, gpr[30]); + CHECK_REG(PT_R31, gpr[31]); + CHECK_REG(PT_NIP, nip); + CHECK_REG(PT_MSR, msr); + CHECK_REG(PT_ORIG_R3, orig_gpr3); + CHECK_REG(PT_CTR, ctr); + CHECK_REG(PT_LNK, link); + CHECK_REG(PT_XER, xer); + CHECK_REG(PT_CCR, ccr); +#ifdef CONFIG_PPC64 + CHECK_REG(PT_SOFTE, softe); +#else + CHECK_REG(PT_MQ, mq); +#endif + CHECK_REG(PT_TRAP, trap); + CHECK_REG(PT_DAR, dar); + CHECK_REG(PT_DSISR, dsisr); + CHECK_REG(PT_RESULT, result); + #undef CHECK_REG + + BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); + + /* + * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the + * real registers. + */ + BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); +} diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c new file mode 100644 index 000000000000..f37eb53de1a1 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -0,0 +1,318 @@ +/* + * ptrace for 32-bit processes running on a 64-bit kernel. + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/m68k/kernel/ptrace.c" + * Copyright (C) 1994 by Hamish Macdonald + * Taken from linux/kernel/ptrace.c and modified for M680x0. + * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds + * + * Modified by Cort Dougan (cort@hq.fsmlabs.com) + * and Paul Mackerras (paulus@samba.org). + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* Macros to workout the correct index for the FPR in the thread struct */ +#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1) +#define FPRHALF(i) (((i) - PT_FPR0) & 1) +#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i) + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + int ret; + + switch (request) { + /* + * Read 4 bytes of the other process' storage + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is a pointer in the user's storage that contains an 8 byte + * address in the other process of the 4 bytes that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_PEEKTEXT_3264: + case PPC_PTRACE_PEEKDATA_3264: { + u32 tmp; + int copied; + u32 __user * addrOthers; + + ret = -EIO; + + /* Get the addr in the other process that we want to read */ + if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) + break; + + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), FOLL_FORCE); + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (u32 __user *)data); + break; + } + + /* Read a register (specified by ADDR) out of the "user area" */ + case PTRACE_PEEKUSR: { + int index; + unsigned long tmp; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || (index > PT_FPSCR32)) + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_get_reg(child, index, &tmp); + if (ret) + break; + } else { + flush_fp_to_thread(child); + /* + * the user space code considers the floating point + * to be an array of unsigned int (32 bits) - the + * index passed in is based on this assumption. + */ + tmp = ((unsigned int *)child->thread.fp_state.fpr) + [FPRINDEX(index)]; + } + ret = put_user((unsigned int)tmp, (u32 __user *)data); + break; + } + + /* + * Read 4 bytes out of the other process' pt_regs area + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is the offset into the other process' pt_regs structure + * that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + */ + case PPC_PTRACE_PEEKUSR_3264: { + u32 index; + u32 reg32bits; + u64 tmp; + u32 numReg; + u32 part; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; + /* Determine which part of the register the user wants */ + if (index % 2) + part = 1; /* want the 2nd half of the register (right-most). */ + else + part = 0; /* want the 1st half of the register (left-most). */ + + /* Validate the input - check to see if address is on the wrong boundary + * or beyond the end of the user area + */ + if ((addr & 3) || numReg > PT_FPSCR) + break; + + CHECK_FULL_REGS(child->thread.regs); + if (numReg >= PT_FPR0) { + flush_fp_to_thread(child); + /* get 64 bit FPR */ + tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0]; + } else { /* register within PT_REGS struct */ + unsigned long tmp2; + ret = ptrace_get_reg(child, numReg, &tmp2); + if (ret) + break; + tmp = tmp2; + } + reg32bits = ((u32*)&tmp)[part]; + ret = put_user(reg32bits, (u32 __user *)data); + break; + } + + /* + * Write 4 bytes into the other process' storage + * data is the 4 bytes that the user wants written + * addr is a pointer in the user's storage that contains an + * 8 byte address in the other process where the 4 bytes + * that is to be written + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_POKETEXT_3264: + case PPC_PTRACE_POKEDATA_3264: { + u32 tmp = data; + u32 __user * addrOthers; + + /* Get the addr in the other process that we want to write into */ + ret = -EIO; + if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) + break; + ret = 0; + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), + FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) + break; + ret = -EIO; + break; + } + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: { + unsigned long index; + + ret = -EIO; + /* convert to index and check */ + index = (unsigned long) addr >> 2; + if ((addr & 3) || (index > PT_FPSCR32)) + break; + + CHECK_FULL_REGS(child->thread.regs); + if (index < PT_FPR0) { + ret = ptrace_put_reg(child, index, data); + } else { + flush_fp_to_thread(child); + /* + * the user space code considers the floating point + * to be an array of unsigned int (32 bits) - the + * index passed in is based on this assumption. + */ + ((unsigned int *)child->thread.fp_state.fpr) + [FPRINDEX(index)] = data; + ret = 0; + } + break; + } + + /* + * Write 4 bytes into the other process' pt_regs area + * data is the 4 bytes that the user wants written + * addr is the offset into the other process' pt_regs structure + * that is to be written into + * (this is run in a 32-bit process looking at a 64-bit process) + */ + case PPC_PTRACE_POKEUSR_3264: { + u32 index; + u32 numReg; + + ret = -EIO; + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; + + /* + * Validate the input - check to see if address is on the + * wrong boundary or beyond the end of the user area + */ + if ((addr & 3) || (numReg > PT_FPSCR)) + break; + CHECK_FULL_REGS(child->thread.regs); + if (numReg < PT_FPR0) { + unsigned long freg; + ret = ptrace_get_reg(child, numReg, &freg); + if (ret) + break; + if (index % 2) + freg = (freg & ~0xfffffffful) | (data & 0xfffffffful); + else + freg = (freg & 0xfffffffful) | (data << 32); + ret = ptrace_put_reg(child, numReg, freg); + } else { + u64 *tmp; + flush_fp_to_thread(child); + /* get 64 bit FPR ... */ + tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0]; + /* ... write the 32 bit part we want */ + ((u32 *)tmp)[index % 2] = data; + ret = 0; + } + break; + } + + case PTRACE_GET_DEBUGREG: { +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr_fake; +#endif + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.debug.dac1, (u32 __user *)data); +#else + dabr_fake = ( + (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + ret = put_user(dabr_fake, (u32 __user *)data); +#endif + break; + } + + case PTRACE_GETREGS: /* Get all pt_regs from the child. */ + return copy_regset_to_user( + child, task_user_regset_view(current), 0, + 0, PT_REGS_COUNT * sizeof(compat_long_t), + compat_ptr(data)); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user( + child, task_user_regset_view(current), 0, + 0, PT_REGS_COUNT * sizeof(compat_long_t), + compat_ptr(data)); + + case PTRACE_GETFPREGS: + case PTRACE_SETFPREGS: + case PTRACE_GETVRREGS: + case PTRACE_SETVRREGS: + case PTRACE_GETVSRREGS: + case PTRACE_SETVSRREGS: + case PTRACE_GETREGS64: + case PTRACE_SETREGS64: + case PTRACE_KILL: + case PTRACE_SINGLESTEP: + case PTRACE_DETACH: + case PTRACE_SET_DEBUGREG: + case PTRACE_SYSCALL: + case PTRACE_CONT: + case PPC_PTRACE_GETHWDBGINFO: + case PPC_PTRACE_SETHWDEBUG: + case PPC_PTRACE_DELHWDEBUG: + ret = arch_ptrace(child, request, addr, data); + break; + + default: + ret = compat_ptrace_request(child, request, addr, data); + break; + } + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c deleted file mode 100644 index f37eb53de1a1..000000000000 --- a/arch/powerpc/kernel/ptrace32.c +++ /dev/null @@ -1,318 +0,0 @@ -/* - * ptrace for 32-bit processes running on a 64-bit kernel. - * - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Derived from "arch/m68k/kernel/ptrace.c" - * Copyright (C) 1994 by Hamish Macdonald - * Taken from linux/kernel/ptrace.c and modified for M680x0. - * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds - * - * Modified by Cort Dougan (cort@hq.fsmlabs.com) - * and Paul Mackerras (paulus@samba.org). - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file COPYING in the main directory of - * this archive for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* Macros to workout the correct index for the FPR in the thread struct */ -#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1) -#define FPRHALF(i) (((i) - PT_FPR0) & 1) -#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i) - -long compat_arch_ptrace(struct task_struct *child, compat_long_t request, - compat_ulong_t caddr, compat_ulong_t cdata) -{ - unsigned long addr = caddr; - unsigned long data = cdata; - int ret; - - switch (request) { - /* - * Read 4 bytes of the other process' storage - * data is a pointer specifying where the user wants the - * 4 bytes copied into - * addr is a pointer in the user's storage that contains an 8 byte - * address in the other process of the 4 bytes that is to be read - * (this is run in a 32-bit process looking at a 64-bit process) - * when I and D space are separate, these will need to be fixed. - */ - case PPC_PTRACE_PEEKTEXT_3264: - case PPC_PTRACE_PEEKDATA_3264: { - u32 tmp; - int copied; - u32 __user * addrOthers; - - ret = -EIO; - - /* Get the addr in the other process that we want to read */ - if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) - break; - - copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), FOLL_FORCE); - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, (u32 __user *)data); - break; - } - - /* Read a register (specified by ADDR) out of the "user area" */ - case PTRACE_PEEKUSR: { - int index; - unsigned long tmp; - - ret = -EIO; - /* convert to index and check */ - index = (unsigned long) addr >> 2; - if ((addr & 3) || (index > PT_FPSCR32)) - break; - - CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { - ret = ptrace_get_reg(child, index, &tmp); - if (ret) - break; - } else { - flush_fp_to_thread(child); - /* - * the user space code considers the floating point - * to be an array of unsigned int (32 bits) - the - * index passed in is based on this assumption. - */ - tmp = ((unsigned int *)child->thread.fp_state.fpr) - [FPRINDEX(index)]; - } - ret = put_user((unsigned int)tmp, (u32 __user *)data); - break; - } - - /* - * Read 4 bytes out of the other process' pt_regs area - * data is a pointer specifying where the user wants the - * 4 bytes copied into - * addr is the offset into the other process' pt_regs structure - * that is to be read - * (this is run in a 32-bit process looking at a 64-bit process) - */ - case PPC_PTRACE_PEEKUSR_3264: { - u32 index; - u32 reg32bits; - u64 tmp; - u32 numReg; - u32 part; - - ret = -EIO; - /* Determine which register the user wants */ - index = (u64)addr >> 2; - numReg = index / 2; - /* Determine which part of the register the user wants */ - if (index % 2) - part = 1; /* want the 2nd half of the register (right-most). */ - else - part = 0; /* want the 1st half of the register (left-most). */ - - /* Validate the input - check to see if address is on the wrong boundary - * or beyond the end of the user area - */ - if ((addr & 3) || numReg > PT_FPSCR) - break; - - CHECK_FULL_REGS(child->thread.regs); - if (numReg >= PT_FPR0) { - flush_fp_to_thread(child); - /* get 64 bit FPR */ - tmp = child->thread.fp_state.fpr[numReg - PT_FPR0][0]; - } else { /* register within PT_REGS struct */ - unsigned long tmp2; - ret = ptrace_get_reg(child, numReg, &tmp2); - if (ret) - break; - tmp = tmp2; - } - reg32bits = ((u32*)&tmp)[part]; - ret = put_user(reg32bits, (u32 __user *)data); - break; - } - - /* - * Write 4 bytes into the other process' storage - * data is the 4 bytes that the user wants written - * addr is a pointer in the user's storage that contains an - * 8 byte address in the other process where the 4 bytes - * that is to be written - * (this is run in a 32-bit process looking at a 64-bit process) - * when I and D space are separate, these will need to be fixed. - */ - case PPC_PTRACE_POKETEXT_3264: - case PPC_PTRACE_POKEDATA_3264: { - u32 tmp = data; - u32 __user * addrOthers; - - /* Get the addr in the other process that we want to write into */ - ret = -EIO; - if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) - break; - ret = 0; - if (ptrace_access_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), - FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) - break; - ret = -EIO; - break; - } - - /* write the word at location addr in the USER area */ - case PTRACE_POKEUSR: { - unsigned long index; - - ret = -EIO; - /* convert to index and check */ - index = (unsigned long) addr >> 2; - if ((addr & 3) || (index > PT_FPSCR32)) - break; - - CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { - ret = ptrace_put_reg(child, index, data); - } else { - flush_fp_to_thread(child); - /* - * the user space code considers the floating point - * to be an array of unsigned int (32 bits) - the - * index passed in is based on this assumption. - */ - ((unsigned int *)child->thread.fp_state.fpr) - [FPRINDEX(index)] = data; - ret = 0; - } - break; - } - - /* - * Write 4 bytes into the other process' pt_regs area - * data is the 4 bytes that the user wants written - * addr is the offset into the other process' pt_regs structure - * that is to be written into - * (this is run in a 32-bit process looking at a 64-bit process) - */ - case PPC_PTRACE_POKEUSR_3264: { - u32 index; - u32 numReg; - - ret = -EIO; - /* Determine which register the user wants */ - index = (u64)addr >> 2; - numReg = index / 2; - - /* - * Validate the input - check to see if address is on the - * wrong boundary or beyond the end of the user area - */ - if ((addr & 3) || (numReg > PT_FPSCR)) - break; - CHECK_FULL_REGS(child->thread.regs); - if (numReg < PT_FPR0) { - unsigned long freg; - ret = ptrace_get_reg(child, numReg, &freg); - if (ret) - break; - if (index % 2) - freg = (freg & ~0xfffffffful) | (data & 0xfffffffful); - else - freg = (freg & 0xfffffffful) | (data << 32); - ret = ptrace_put_reg(child, numReg, freg); - } else { - u64 *tmp; - flush_fp_to_thread(child); - /* get 64 bit FPR ... */ - tmp = &child->thread.fp_state.fpr[numReg - PT_FPR0][0]; - /* ... write the 32 bit part we want */ - ((u32 *)tmp)[index % 2] = data; - ret = 0; - } - break; - } - - case PTRACE_GET_DEBUGREG: { -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dabr_fake; -#endif - ret = -EINVAL; - /* We only support one DABR and no IABRS at the moment */ - if (addr > 0) - break; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.debug.dac1, (u32 __user *)data); -#else - dabr_fake = ( - (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); - ret = put_user(dabr_fake, (u32 __user *)data); -#endif - break; - } - - case PTRACE_GETREGS: /* Get all pt_regs from the child. */ - return copy_regset_to_user( - child, task_user_regset_view(current), 0, - 0, PT_REGS_COUNT * sizeof(compat_long_t), - compat_ptr(data)); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user( - child, task_user_regset_view(current), 0, - 0, PT_REGS_COUNT * sizeof(compat_long_t), - compat_ptr(data)); - - case PTRACE_GETFPREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETVRREGS: - case PTRACE_SETVRREGS: - case PTRACE_GETVSRREGS: - case PTRACE_SETVSRREGS: - case PTRACE_GETREGS64: - case PTRACE_SETREGS64: - case PTRACE_KILL: - case PTRACE_SINGLESTEP: - case PTRACE_DETACH: - case PTRACE_SET_DEBUGREG: - case PTRACE_SYSCALL: - case PTRACE_CONT: - case PPC_PTRACE_GETHWDBGINFO: - case PPC_PTRACE_SETHWDEBUG: - case PPC_PTRACE_DELHWDEBUG: - ret = arch_ptrace(child, request, addr, data); - break; - - default: - ret = compat_ptrace_request(child, request, addr, data); - break; - } - - return ret; -} -- cgit v1.2.3-59-g8ed1b From b3138536c837f81179c5a902561043fbf7ee0333 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:38 +0000 Subject: powerpc/ptrace: remove unused header includes Remove unused header includes in ptrace.c and ptrace32.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6276df0be87a4329c2bb46b3b0f02059ae9e70e6.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/ptrace.c | 19 ++----------------- arch/powerpc/kernel/ptrace/ptrace32.c | 11 ----------- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 25c0424e8868..7ed54dbb2d7e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -15,35 +15,20 @@ * this archive for more details. */ -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include #include -#include #include -#include #include #include - -#include +#include #include -#include -#include + #include #include #include #include -#include #define CREATE_TRACE_POINTS #include diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index f37eb53de1a1..7976ddf29c0e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -17,21 +17,10 @@ * this archive for more details. */ -#include -#include -#include -#include -#include #include #include -#include -#include -#include #include -#include -#include -#include #include /* -- cgit v1.2.3-59-g8ed1b From f1763e623c69bcec2c1e739e990058de41d45030 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:39 +0000 Subject: powerpc/ptrace: drop unnecessary #ifdefs CONFIG_PPC64 Drop a bunch of #ifdefs CONFIG_PPC64 that are not vital. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/af38b87a7e1e3efe4f9b664eaeb029e6e7d69fdb.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/ptrace.h | 2 ++ arch/powerpc/kernel/ptrace/ptrace.c | 18 +++--------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 082a40153b94..e0195e6b892b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -279,6 +279,8 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #endif /* __ASSEMBLY__ */ #ifndef __powerpc64__ +/* We need PT_SOFTE defined at all time to avoid #ifdefs */ +#define PT_SOFTE PT_MQ #else /* __powerpc64__ */ #define PT_FPSCR32 (PT_FPR0 + 2*32 + 1) /* each FP reg occupies 2 32-bit userspace slots */ #define PT_VR0_32 164 /* each Vector reg occupies 4 slots in 32-bit */ diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 7ed54dbb2d7e..3dd94c296ac7 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -274,17 +274,15 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) if (regno == PT_DSCR) return get_user_dscr(task, data); -#ifdef CONFIG_PPC64 /* * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is * no more used as a flag, lets force usr to alway see the softe value as 1 * which means interrupts are not soft disabled. */ - if (regno == PT_SOFTE) { + if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) { *data = 1; return 0; } -#endif regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); if (regno < regs_max) { @@ -1998,7 +1996,6 @@ static const struct user_regset_view user_ppc_native_view = { .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) }; -#ifdef CONFIG_PPC64 #include static int gpr32_get_common(struct task_struct *target, @@ -2272,14 +2269,11 @@ static const struct user_regset_view user_ppc_compat_view = { .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) }; -#endif /* CONFIG_PPC64 */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_PPC64 - if (test_tsk_thread_flag(task, TIF_32BIT)) + if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) return &user_ppc_compat_view; -#endif return &user_ppc_native_view; } @@ -3063,11 +3057,7 @@ long arch_ptrace(struct task_struct *child, long request, else dbginfo.num_data_bps = 0; dbginfo.num_condition_regs = 0; -#ifdef CONFIG_PPC64 - dbginfo.data_bp_alignment = 8; -#else - dbginfo.data_bp_alignment = 4; -#endif + dbginfo.data_bp_alignment = sizeof(long); dbginfo.sizeof_condition = 0; #ifdef CONFIG_HAVE_HW_BREAKPOINT dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; @@ -3304,12 +3294,10 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gpr[0]); -#ifdef CONFIG_PPC64 if (!is_32bit_task()) audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], regs->gpr[5], regs->gpr[6]); else -#endif audit_syscall_entry(regs->gpr[0], regs->gpr[3] & 0xffffffff, regs->gpr[4] & 0xffffffff, -- cgit v1.2.3-59-g8ed1b From 963ae6b2ff1c202f2dbbe042987233a0e21d7249 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:40 +0000 Subject: powerpc/ptrace: drop PARAMETER_SAVE_AREA_OFFSET PARAMETER_SAVE_AREA_OFFSET is not used, drop it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6dac2b49207647f75cbf0e6771a545e691f0fd93.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/ptrace.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 3dd94c296ac7..22826c942eae 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -33,16 +33,6 @@ #define CREATE_TRACE_POINTS #include -/* - * The parameter save area on the stack is used to store arguments being passed - * to callee function and is located at fixed offset from stack pointer. - */ -#ifdef CONFIG_PPC32 -#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ -#else /* CONFIG_PPC32 */ -#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ -#endif - struct pt_regs_offset { const char *name; int offset; -- cgit v1.2.3-59-g8ed1b From 7b99ed4e8e3acd8eb9a8bcc71b9b9273a573bdbf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:41 +0000 Subject: powerpc/ptrace: split out VSX related functions. Move CONFIG_VSX functions out of ptrace.c, into ptrace-vsx.c and ptrace-novsx.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/dc8e20c8c95b7e83add0c6dd48f9470628896c5c.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/Makefile | 4 + arch/powerpc/kernel/ptrace/ptrace-decl.h | 26 +++++ arch/powerpc/kernel/ptrace/ptrace-novsx.c | 57 ++++++++++ arch/powerpc/kernel/ptrace/ptrace-vsx.c | 151 ++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 175 +----------------------------- 5 files changed, 241 insertions(+), 172 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-decl.h create mode 100644 arch/powerpc/kernel/ptrace/ptrace-novsx.c create mode 100644 arch/powerpc/kernel/ptrace/ptrace-vsx.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 02fb28eb3b55..238c27189078 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -7,3 +7,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o obj-$(CONFIG_PPC64) += ptrace32.o +obj-$(CONFIG_VSX) += ptrace-vsx.o +ifneq ($(CONFIG_VSX),y) +obj-y += ptrace-novsx.o +endif diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h new file mode 100644 index 000000000000..764df4ee9362 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* ptrace-(no)vsx */ + +int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace-vsx */ + +int vsr_active(struct task_struct *target, const struct user_regset *regset); +int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/* ptrace */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void flush_tmregs_to_thread(struct task_struct *tsk); +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c new file mode 100644 index 000000000000..b2dc4e92d11a --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + */ +int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_state, 0, -1); +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + */ +int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != + offsetof(struct thread_fp_state, fpr[32])); + + flush_fp_to_thread(target); + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_state, 0, -1); +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c new file mode 100644 index 000000000000..d53466d49cc0 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + */ +int fpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + flush_fp_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + * }; + * + */ +int fpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + flush_fp_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + + for (i = 0; i < 32 ; i++) + target->thread.TS_FPR(i) = buf[i]; + target->thread.fp_state.fpscr = buf[32]; + return 0; +} + +/* + * Currently to set and and get all the vsx state, you need to call + * the fp and VMX calls as well. This only get/sets the lower 32 + * 128bit VSX registers. + */ + +int vsr_active(struct task_struct *target, const struct user_regset *regset) +{ + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ +int vsr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/* + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. + * + * Userspace interface buffer layout: + * + * struct data { + * u64 vsx[32]; + * }; + */ +int vsr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 22826c942eae..ead33b74e1f3 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -33,6 +33,8 @@ #define CREATE_TRACE_POINTS #include +#include "ptrace-decl.h" + struct pt_regs_offset { const char *name; int offset; @@ -100,7 +102,7 @@ static const struct pt_regs_offset regoffset_table[] = { }; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static void flush_tmregs_to_thread(struct task_struct *tsk) +void flush_tmregs_to_thread(struct task_struct *tsk) { /* * If task is not current, it will have been flushed already to @@ -120,8 +122,6 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) tm_save_sprs(&(tsk->thread)); } } -#else -static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } #endif /** @@ -403,91 +403,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return ret; } -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - * }; - */ -static int fpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ -#ifdef CONFIG_VSX - u64 buf[33]; - int i; - - flush_fp_to_thread(target); - - /* copy to local buffer then write that out */ - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -#else - BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32])); - - flush_fp_to_thread(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); -#endif -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - * }; - * - */ -static int fpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ -#ifdef CONFIG_VSX - u64 buf[33]; - int i; - - flush_fp_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; - - for (i = 0; i < 32 ; i++) - target->thread.TS_FPR(i) = buf[i]; - target->thread.fp_state.fpscr = buf[32]; - return 0; -#else - BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != - offsetof(struct thread_fp_state, fpr[32])); - - flush_fp_to_thread(target); - - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_state, 0, -1); -#endif -} - #ifdef CONFIG_ALTIVEC /* * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. @@ -612,90 +527,6 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -/* - * Currently to set and and get all the vsx state, you need to call - * the fp and VMX calls as well. This only get/sets the lower 32 - * 128bit VSX registers. - */ - -static int vsr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_vsx_to_thread(target); - return target->thread.used_vsr ? regset->n : 0; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last - * checkpointed value of all FPR registers for the current - * transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 vsx[32]; - * }; - */ -static int vsr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; -} - -/* - * Regardless of transactions, 'fp_state' holds the current running - * value of all FPR registers and 'ckfp_state' holds the last - * checkpointed value of all FPR registers for the current - * transaction. - * - * Userspace interface buffer layout: - * - * struct data { - * u64 vsx[32]; - * }; - */ -static int vsr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[32]; - int ret,i; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - if (!ret) - for (i = 0; i < 32 ; i++) - target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - - return ret; -} -#endif /* CONFIG_VSX */ - #ifdef CONFIG_SPE /* -- cgit v1.2.3-59-g8ed1b From 1b20773b00b71e361a6072d4b0ea6299e10678c9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:42 +0000 Subject: powerpc/ptrace: split out ALTIVEC related functions. Move CONFIG_ALTIVEC functions out of ptrace.c, into ptrace-altivec.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/35dae891d01c817fca0fd6ab406a3a2c7bf07f60.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/Makefile | 1 + arch/powerpc/kernel/ptrace/ptrace-altivec.c | 128 ++++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace-decl.h | 9 ++ arch/powerpc/kernel/ptrace/ptrace.c | 124 --------------------------- 4 files changed, 138 insertions(+), 124 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-altivec.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 238c27189078..522e6fd0b5b8 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) obj-y += ptrace-novsx.o endif +obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c new file mode 100644 index 000000000000..dd8b75dfbd06 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include + +#include "ptrace-decl.h" + +/* + * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. + * The transfer totals 34 quadword. Quadwords 0-31 contain the + * corresponding vector registers. Quadword 32 contains the vscr as the + * last word (offset 12) within that quadword. Quadword 33 contains the + * vrsave as the first word (offset 0) within the quadword. + * + * This definition of the VMX state is compatible with the current PPC32 + * ptrace interface. This allows signal handling and ptrace to use the + * same structures. This also simplifies the implementation of a bi-arch + * (combined (32- and 64-bit) gdb. + */ + +int vr_active(struct task_struct *target, const struct user_regset *regset) +{ + flush_altivec_to_thread(target); + return target->thread.used_vr ? regset->n : 0; +} + +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ +int vr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + int start, end; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + + vrsave.word = target->thread.vrsave; + + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + start, end); + } + + return ret; +} + +/* + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. + * + * Userspace interface buffer layout: + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + * }; + */ +int vr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_altivec_to_thread(target); + + BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != + offsetof(struct thread_vr_state, vr[32])); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.vr_state, 0, + 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the first word of vrsave. + */ + int start, end; + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + + vrsave.word = target->thread.vrsave; + + start = 33 * sizeof(vector128); + end = start + sizeof(vrsave); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + start, end); + if (!ret) + target->thread.vrsave = vrsave.word; + } + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 764df4ee9362..0f9282cb52fc 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -17,6 +17,15 @@ int vsr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); +/* ptrace-altivec */ + +int vr_active(struct task_struct *target, const struct user_regset *regset); +int vr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int vr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + /* ptrace */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index ead33b74e1f3..c383325db4a6 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -403,130 +403,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return ret; } -#ifdef CONFIG_ALTIVEC -/* - * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. - * The transfer totals 34 quadword. Quadwords 0-31 contain the - * corresponding vector registers. Quadword 32 contains the vscr as the - * last word (offset 12) within that quadword. Quadword 33 contains the - * vrsave as the first word (offset 0) within the quadword. - * - * This definition of the VMX state is compatible with the current PPC32 - * ptrace interface. This allows signal handling and ptrace to use the - * same structures. This also simplifies the implementation of a bi-arch - * (combined (32- and 64-bit) gdb. - */ - -static int vr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_altivec_to_thread(target); - return target->thread.used_vr ? regset->n : 0; -} - -/* - * Regardless of transactions, 'vr_state' holds the current running - * value of all the VMX registers and 'ckvr_state' holds the last - * checkpointed value of all the VMX registers for the current - * transaction to fall back on in case it aborts. - * - * Userspace interface buffer layout: - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - * }; - */ -static int vr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - flush_altivec_to_thread(target); - - BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != - offsetof(struct thread_vr_state, vr[32])); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - } - - return ret; -} - -/* - * Regardless of transactions, 'vr_state' holds the current running - * value of all the VMX registers and 'ckvr_state' holds the last - * checkpointed value of all the VMX registers for the current - * transaction to fall back on in case it aborts. - * - * Userspace interface buffer layout: - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - * }; - */ -static int vr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - flush_altivec_to_thread(target); - - BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != - offsetof(struct thread_vr_state, vr[32])); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, - 33 * sizeof(vector128)); - if (!ret && count > 0) { - /* - * We use only the first word of vrsave. - */ - int start, end; - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - - vrsave.word = target->thread.vrsave; - - start = 33 * sizeof(vector128); - end = start + sizeof(vrsave); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - start, end); - if (!ret) - target->thread.vrsave = vrsave.word; - } - - return ret; -} -#endif /* CONFIG_ALTIVEC */ - #ifdef CONFIG_SPE /* -- cgit v1.2.3-59-g8ed1b From 60ef9dbd9d2ada53f488f75ae7fef13bb4962636 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:43 +0000 Subject: powerpc/ptrace: split out SPE related functions. Move CONFIG_SPE functions out of ptrace.c, into ptrace-spe.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0f17a331760310b5562fae3791cdd3cf9c64237b.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/Makefile | 1 + arch/powerpc/kernel/ptrace/ptrace-decl.h | 9 +++++ arch/powerpc/kernel/ptrace/ptrace-spe.c | 68 ++++++++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 66 ------------------------------- 4 files changed, 78 insertions(+), 66 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-spe.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 522e6fd0b5b8..f87eadf6e072 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -12,3 +12,4 @@ ifneq ($(CONFIG_VSX),y) obj-y += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o +obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 0f9282cb52fc..8a362f97f1d6 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -26,6 +26,15 @@ int vr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); +/* ptrace-spe */ + +int evr_active(struct task_struct *target, const struct user_regset *regset); +int evr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int evr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + /* ptrace */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c new file mode 100644 index 000000000000..68b86b4a4be4 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "ptrace-decl.h" + +/* + * For get_evrregs/set_evrregs functions 'data' has the following layout: + * + * struct { + * u32 evr[32]; + * u64 acc; + * u32 spefscr; + * } + */ + +int evr_active(struct task_struct *target, const struct user_regset *regset) +{ + flush_spe_to_thread(target); + return target->thread.used_spe ? regset->n : 0; +} + +int evr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} + +int evr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + flush_spe_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.evr, + 0, sizeof(target->thread.evr)); + + BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != + offsetof(struct thread_struct, spefscr)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.acc, + sizeof(target->thread.evr), -1); + + return ret; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c383325db4a6..ca2b4d804992 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -403,72 +403,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return ret; } -#ifdef CONFIG_SPE - -/* - * For get_evrregs/set_evrregs functions 'data' has the following layout: - * - * struct { - * u32 evr[32]; - * u64 acc; - * u32 spefscr; - * } - */ - -static int evr_active(struct task_struct *target, - const struct user_regset *regset) -{ - flush_spe_to_thread(target); - return target->thread.used_spe ? regset->n : 0; -} - -static int evr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - flush_spe_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); - - BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != - offsetof(struct thread_struct, spefscr)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; -} - -static int evr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - flush_spe_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.evr, - 0, sizeof(target->thread.evr)); - - BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) != - offsetof(struct thread_struct, spefscr)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.acc, - sizeof(target->thread.evr), -1); - - return ret; -} -#endif /* CONFIG_SPE */ - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /** * tm_cgpr_active - get active number of registers in CGPR -- cgit v1.2.3-59-g8ed1b From 7c1f8db019f82e5f81bf1a84fd8b064d5d01652a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:44 +0000 Subject: powerpc/ptrace: split out TRANSACTIONAL_MEM related functions. Move TRANSACTIONAL_MEM functions out of ptrace.c, into ptrace-tm.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2d0ef3bb2610c0344bd42252c7134f429818c000.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/Makefile | 1 + arch/powerpc/kernel/ptrace/ptrace-decl.h | 89 +++ arch/powerpc/kernel/ptrace/ptrace-tm.c | 851 ++++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 916 +------------------------------ 4 files changed, 943 insertions(+), 914 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-tm.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index f87eadf6e072..2d7f5f301536 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -13,3 +13,4 @@ obj-y += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o +obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 8a362f97f1d6..8d076818f1de 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -1,5 +1,27 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Set of msr bits that gdb can change on behalf of a process. + */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_DEBUGCHANGE 0 +#else +#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) +#endif + +/* + * Max register writeable via put_reg + */ +#ifdef CONFIG_PPC32 +#define PT_MAX_PUT_REG PT_MQ +#else +#define PT_MAX_PUT_REG PT_CCR +#endif + +#define TVSO(f) (offsetof(struct thread_vr_state, f)) +#define TFSO(f) (offsetof(struct thread_fp_state, f)) +#define TSO(f) (offsetof(struct thread_struct, f)) + /* ptrace-(no)vsx */ int fpr_get(struct task_struct *target, const struct user_regset *regset, @@ -37,8 +59,75 @@ int evr_set(struct task_struct *target, const struct user_regset *regset, /* ptrace */ +int gpr32_get_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + unsigned long *regs); +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs); + +/* ptrace-tm */ + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void flush_tmregs_to_thread(struct task_struct *tsk); #else static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } #endif + +int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset); +int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset); +int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset); +int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset); +int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_spr_active(struct task_struct *target, const struct user_regset *regset); +int tm_spr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_spr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_tar_active(struct task_struct *target, const struct user_regset *regset); +int tm_tar_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_tar_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_ppr_active(struct task_struct *target, const struct user_regset *regset); +int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_dscr_active(struct task_struct *target, const struct user_regset *regset); +int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf); +int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c new file mode 100644 index 000000000000..d75aff31f637 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include +#include +#include + +#include "ptrace-decl.h" + +void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state or if not in TM save TM SPRs + * in the appropriate thread structures from live. + */ + + if (!cpu_has_feature(CPU_FTR_TM) || tsk != current) + return; + + if (MSR_TM_SUSPENDED(mfmsr())) { + tm_reclaim_current(TM_CAUSE_SIGNAL); + } else { + tm_enable(); + tm_save_sprs(&tsk->thread); + } +} + +static unsigned long get_user_ckpt_msr(struct task_struct *task) +{ + return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; +} + +static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; + task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.ckpt_regs.trap = trap & 0xfff0; + return 0; +} + +/** + * tm_cgpr_active - get active number of registers in CGPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed GPR category. + */ +int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cgpr_get - get CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds all the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function gets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_ckpt_msr(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct user_pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct user_pt_regs), -1); + + return ret; +} + +/* + * tm_cgpr_set - set the CGPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed GPR registers. + * + * When the transaction is active, 'ckpt_regs' holds the checkpointed + * GPR register values for the current transaction to fall back on if it + * aborts in between. This function sets those checkpointed GPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * struct pt_regs ckpt_regs; + * }; + */ +int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ckpt_regs.orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_ckpt_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +/** + * tm_cfpr_active - get active number of registers in CFPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed FPR category. + */ +int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cfpr_get - get CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed FPR registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* copy to local buffer then write that out */ + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); +} + +/** + * tm_cfpr_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed FPR registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * FPR register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 fpr[32]; + * u64 fpscr; + *}; + */ +int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[33]; + int i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + + /* copy to local buffer then write that out */ + i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); + if (i) + return i; + for (i = 0; i < 32 ; i++) + target->thread.TS_CKFPR(i) = buf[i]; + target->thread.ckfp_state.fpscr = buf[32]; + return 0; +} + +/** + * tm_cvmx_active - get active number of registers in CVMX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in checkpointed VMX category. + */ +int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + return regset->n; +} + +/** + * tm_cvmx_get - get CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state, + 0, 33 * sizeof(vector128)); + if (!ret) { + /* + * Copy out only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + } + + return ret; +} + +/** + * tm_cvmx_set - set CMVX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VMX registers. + * + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold + * the checkpointed values for the current transaction to fall + * back on if it aborts in between. The userspace interface buffer + * layout is as follows. + * + * struct data { + * vector128 vr[32]; + * vector128 vscr; + * vector128 vrsave; + *}; + */ +int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state, + 0, 33 * sizeof(vector128)); + if (!ret && count > 0) { + /* + * We use only the low-order word of vrsave. + */ + union { + elf_vrreg_t reg; + u32 word; + } vrsave; + memset(&vrsave, 0, sizeof(vrsave)); + vrsave.word = target->thread.ckvrsave; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, + 33 * sizeof(vector128), -1); + if (!ret) + target->thread.ckvrsave = vrsave.word; + } + + return ret; +} + +/** + * tm_cvsx_active - get active number of registers in CVSX + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks for the active number of available + * regisers in transaction checkpointed VSX category. + */ +int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return 0; + + flush_vsx_to_thread(target); + return target->thread.used_vsr ? regset->n : 0; +} + +/** + * tm_cvsx_get - get CVSX registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * values for the current transaction to fall back on if it aborts + * in between. This function gets those checkpointed VSX registers. + * The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + + return ret; +} + +/** + * tm_cvsx_set - set CFPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets in transaction checkpointed VSX registers. + * + * When the transaction is active 'ckfp_state' holds the checkpointed + * VSX register values for the current transaction to fall back on + * if it aborts in between. This function sets these checkpointed + * FPR registers. The userspace interface buffer layout is as follows. + * + * struct data { + * u64 vsx[32]; + *}; + */ +int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 buf[32]; + int ret, i; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + /* Flush the state */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + flush_vsx_to_thread(target); + + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + buf, 0, 32 * sizeof(double)); + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + + return ret; +} + +/** + * tm_spr_active - get active number of registers in TM SPR + * @target: The target task. + * @regset: The user regset structure. + * + * This function checks the active number of available + * regisers in the transactional memory SPR category. + */ +int tm_spr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + return regset->n; +} + +/** + * tm_spr_get - get the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy from. + * @ubuf: User buffer to copy into. + * + * This function gets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +int tm_spr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +/** + * tm_spr_set - set the TM related SPR registers + * @target: The target task. + * @regset: The user regset structure. + * @pos: The buffer position. + * @count: Number of bytes to copy. + * @kbuf: Kernel buffer to copy into. + * @ubuf: User buffer to copy from. + * + * This function sets transactional memory related SPR registers. + * The userspace interface buffer layout is as follows. + * + * struct { + * u64 tm_tfhar; + * u64 tm_texasr; + * u64 tm_tfiar; + * }; + */ +int tm_spr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* Build tests */ + BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); + BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); + BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + /* Flush the states */ + flush_tmregs_to_thread(target); + flush_fp_to_thread(target); + flush_altivec_to_thread(target); + + /* TFHAR register */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfhar, 0, sizeof(u64)); + + /* TEXASR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_texasr, sizeof(u64), + 2 * sizeof(u64)); + + /* TFIAR register */ + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tfiar, + 2 * sizeof(u64), 3 * sizeof(u64)); + return ret; +} + +int tm_tar_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +int tm_tar_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +int tm_tar_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_tar, 0, sizeof(u64)); + return ret; +} + +int tm_ppr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + + +int tm_ppr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +int tm_ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_ppr, 0, sizeof(u64)); + return ret; +} + +int tm_dscr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (MSR_TM_ACTIVE(target->thread.regs->msr)) + return regset->n; + + return 0; +} + +int tm_dscr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +int tm_dscr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_TM)) + return -ENODEV; + + if (!MSR_TM_ACTIVE(target->thread.regs->msr)) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tm_dscr, 0, sizeof(u64)); + return ret; +} + +int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} + +int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index ca2b4d804992..2ed032f00a19 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -26,7 +26,6 @@ #include #include -#include #include #include @@ -47,10 +46,6 @@ struct pt_regs_offset { {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} #define REG_OFFSET_END {.name = NULL, .offset = 0} -#define TVSO(f) (offsetof(struct thread_vr_state, f)) -#define TFSO(f) (offsetof(struct thread_fp_state, f)) -#define TSO(f) (offsetof(struct thread_struct, f)) - static const struct pt_regs_offset regoffset_table[] = { GPR_OFFSET_NAME(0), GPR_OFFSET_NAME(1), @@ -101,29 +96,6 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_END, }; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void flush_tmregs_to_thread(struct task_struct *tsk) -{ - /* - * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). - * - * A reclaim flushes ALL the state or if not in TM save TM SPRs - * in the appropriate thread structures from live. - */ - - if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) - return; - - if (MSR_TM_SUSPENDED(mfmsr())) { - tm_reclaim_current(TM_CAUSE_SIGNAL); - } else { - tm_enable(); - tm_save_sprs(&(tsk->thread)); - } -} -#endif - /** * regs_query_register_offset() - query register offset from its name * @name: the name of a register @@ -161,24 +133,6 @@ const char *regs_query_register_name(unsigned int offset) * in exit.c or in signal.c. */ -/* - * Set of msr bits that gdb can change on behalf of a process. - */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -#define MSR_DEBUGCHANGE 0 -#else -#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) -#endif - -/* - * Max register writeable via put_reg - */ -#ifdef CONFIG_PPC32 -#define PT_MAX_PUT_REG PT_MQ -#else -#define PT_MAX_PUT_REG PT_CCR -#endif - static unsigned long get_user_msr(struct task_struct *task) { return task->thread.regs->msr | task->thread.fpexc_mode; @@ -191,26 +145,6 @@ static int set_user_msr(struct task_struct *task, unsigned long msr) return 0; } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static unsigned long get_user_ckpt_msr(struct task_struct *task) -{ - return task->thread.ckpt_regs.msr | task->thread.fpexc_mode; -} - -static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr) -{ - task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE; - task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE; - return 0; -} - -static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap) -{ - task->thread.ckpt_regs.trap = trap & 0xfff0; - return 0; -} -#endif - #ifdef CONFIG_PPC64 static int get_user_dscr(struct task_struct *task, unsigned long *data) { @@ -403,832 +337,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return ret; } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/** - * tm_cgpr_active - get active number of registers in CGPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed GPR category. - */ -static int tm_cgpr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cgpr_get - get CGPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets transaction checkpointed GPR registers. - * - * When the transaction is active, 'ckpt_regs' holds all the checkpointed - * GPR register values for the current transaction to fall back on if it - * aborts in between. This function gets those checkpointed GPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * struct pt_regs ckpt_regs; - * }; - */ -static int tm_cgpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_ckpt_msr(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; -} - -/* - * tm_cgpr_set - set the CGPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed GPR registers. - * - * When the transaction is active, 'ckpt_regs' holds the checkpointed - * GPR register values for the current transaction to fall back on if it - * aborts in between. This function sets those checkpointed GPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * struct pt_regs ckpt_regs; - * }; - */ -static int tm_cgpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - unsigned long reg; - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs, - 0, PT_MSR * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_MSR * sizeof(reg), - (PT_MSR + 1) * sizeof(reg)); - if (!ret) - ret = set_user_ckpt_msr(target, reg); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckpt_regs.orig_gpr3, - PT_ORIG_R3 * sizeof(reg), - (PT_MAX_PUT_REG + 1) * sizeof(reg)); - - if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_TRAP * sizeof(reg), - (PT_TRAP + 1) * sizeof(reg)); - if (!ret) - ret = set_user_ckpt_trap(target, reg); - } - - if (!ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); - - return ret; -} - -/** - * tm_cfpr_active - get active number of registers in CFPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed FPR category. - */ -static int tm_cfpr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cfpr_get - get CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed FPR registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * values for the current transaction to fall back on if it aborts - * in between. This function gets those checkpointed FPR registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - *}; - */ -static int tm_cfpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[33]; - int i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* copy to local buffer then write that out */ - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_CKFPR(i); - buf[32] = target->thread.ckfp_state.fpscr; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -} - -/** - * tm_cfpr_set - set CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed FPR registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * FPR register values for the current transaction to fall back on - * if it aborts in between. This function sets these checkpointed - * FPR registers. The userspace interface buffer layout is as follows. - * - * struct data { - * u64 fpr[32]; - * u64 fpscr; - *}; - */ -static int tm_cfpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[33]; - int i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - for (i = 0; i < 32; i++) - buf[i] = target->thread.TS_CKFPR(i); - buf[32] = target->thread.ckfp_state.fpscr; - - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; - for (i = 0; i < 32 ; i++) - target->thread.TS_CKFPR(i) = buf[i]; - target->thread.ckfp_state.fpscr = buf[32]; - return 0; -} - -/** - * tm_cvmx_active - get active number of registers in CVMX - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in checkpointed VMX category. - */ -static int tm_cvmx_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - return regset->n; -} - -/** - * tm_cvmx_get - get CMVX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed VMX registers. - * - * When the transaction is active 'ckvr_state' and 'ckvrsave' hold - * the checkpointed values for the current transaction to fall - * back on if it aborts in between. The userspace interface buffer - * layout is as follows. - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - *}; - */ -static int tm_cvmx_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ckvr_state, 0, - 33 * sizeof(vector128)); - if (!ret) { - /* - * Copy out only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - } - - return ret; -} - -/** - * tm_cvmx_set - set CMVX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed VMX registers. - * - * When the transaction is active 'ckvr_state' and 'ckvrsave' hold - * the checkpointed values for the current transaction to fall - * back on if it aborts in between. The userspace interface buffer - * layout is as follows. - * - * struct data { - * vector128 vr[32]; - * vector128 vscr; - * vector128 vrsave; - *}; - */ -static int tm_cvmx_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32])); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ckvr_state, 0, - 33 * sizeof(vector128)); - if (!ret && count > 0) { - /* - * We use only the low-order word of vrsave. - */ - union { - elf_vrreg_t reg; - u32 word; - } vrsave; - memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.ckvrsave; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, - 33 * sizeof(vector128), -1); - if (!ret) - target->thread.ckvrsave = vrsave.word; - } - - return ret; -} - -/** - * tm_cvsx_active - get active number of registers in CVSX - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks for the active number of available - * regisers in transaction checkpointed VSX category. - */ -static int tm_cvsx_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return 0; - - flush_vsx_to_thread(target); - return target->thread.used_vsr ? regset->n : 0; -} - -/** - * tm_cvsx_get - get CVSX registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets in transaction checkpointed VSX registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * values for the current transaction to fall back on if it aborts - * in between. This function gets those checkpointed VSX registers. - * The userspace interface buffer layout is as follows. - * - * struct data { - * u64 vsx[32]; - *}; - */ -static int tm_cvsx_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - - return ret; -} - -/** - * tm_cvsx_set - set CFPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets in transaction checkpointed VSX registers. - * - * When the transaction is active 'ckfp_state' holds the checkpointed - * VSX register values for the current transaction to fall back on - * if it aborts in between. This function sets these checkpointed - * FPR registers. The userspace interface buffer layout is as follows. - * - * struct data { - * u64 vsx[32]; - *}; - */ -static int tm_cvsx_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 buf[32]; - int ret, i; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - /* Flush the state */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - flush_vsx_to_thread(target); - - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - buf, 0, 32 * sizeof(double)); - if (!ret) - for (i = 0; i < 32 ; i++) - target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - - return ret; -} - -/** - * tm_spr_active - get active number of registers in TM SPR - * @target: The target task. - * @regset: The user regset structure. - * - * This function checks the active number of available - * regisers in the transactional memory SPR category. - */ -static int tm_spr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - return regset->n; -} - -/** - * tm_spr_get - get the TM related SPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy from. - * @ubuf: User buffer to copy into. - * - * This function gets transactional memory related SPR registers. - * The userspace interface buffer layout is as follows. - * - * struct { - * u64 tm_tfhar; - * u64 tm_texasr; - * u64 tm_tfiar; - * }; - */ -static int tm_spr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - /* Build tests */ - BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); - BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); - BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - /* Flush the states */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* TFHAR register */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - - /* TEXASR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - - /* TFIAR register */ - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; -} - -/** - * tm_spr_set - set the TM related SPR registers - * @target: The target task. - * @regset: The user regset structure. - * @pos: The buffer position. - * @count: Number of bytes to copy. - * @kbuf: Kernel buffer to copy into. - * @ubuf: User buffer to copy from. - * - * This function sets transactional memory related SPR registers. - * The userspace interface buffer layout is as follows. - * - * struct { - * u64 tm_tfhar; - * u64 tm_texasr; - * u64 tm_tfiar; - * }; - */ -static int tm_spr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - /* Build tests */ - BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr)); - BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar)); - BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs)); - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - /* Flush the states */ - flush_tmregs_to_thread(target); - flush_fp_to_thread(target); - flush_altivec_to_thread(target); - - /* TFHAR register */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfhar, 0, sizeof(u64)); - - /* TEXASR register */ - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_texasr, sizeof(u64), - 2 * sizeof(u64)); - - /* TFIAR register */ - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tfiar, - 2 * sizeof(u64), 3 * sizeof(u64)); - return ret; -} - -static int tm_tar_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - -static int tm_tar_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; -} - -static int tm_tar_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_tar, 0, sizeof(u64)); - return ret; -} - -static int tm_ppr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - - -static int tm_ppr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; -} - -static int tm_ppr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_ppr, 0, sizeof(u64)); - return ret; -} - -static int tm_dscr_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - return regset->n; - - return 0; -} - -static int tm_dscr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; -} - -static int tm_dscr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_TM)) - return -ENODEV; - - if (!MSR_TM_ACTIVE(target->thread.regs->msr)) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tm_dscr, 0, sizeof(u64)); - return ret; -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - #ifdef CONFIG_PPC64 static int ppr_get(struct task_struct *target, const struct user_regset *regset, @@ -1629,7 +737,7 @@ static const struct user_regset_view user_ppc_native_view = { #include -static int gpr32_get_common(struct task_struct *target, +int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, @@ -1676,7 +784,7 @@ static int gpr32_get_common(struct task_struct *target, PT_REGS_COUNT * sizeof(reg), -1); } -static int gpr32_set_common(struct task_struct *target, +int gpr32_set_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf, @@ -1744,26 +852,6 @@ static int gpr32_set_common(struct task_struct *target, (PT_TRAP + 1) * sizeof(reg), -1); } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -static int tm_cgpr32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.ckpt_regs.gpr[0]); -} - -static int tm_cgpr32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.ckpt_regs.gpr[0]); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - static int gpr32_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, -- cgit v1.2.3-59-g8ed1b From 6e0b79750ce2f3b9c9eabbb5687f343483abdc64 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:45 +0000 Subject: powerpc/ptrace: move register viewing functions out of ptrace.c Create a dedicated ptrace-view.c file. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bfd8c3ed57c9057e4a5d3816737b5ee98c6f7e43.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/Makefile | 4 +- arch/powerpc/kernel/ptrace/ptrace-decl.h | 43 ++ arch/powerpc/kernel/ptrace/ptrace-view.c | 904 +++++++++++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 966 ------------------------------- 4 files changed, 949 insertions(+), 968 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-view.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 2d7f5f301536..7addc5994bb9 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -3,9 +3,9 @@ # Makefile for the linux kernel. # -CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' +CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -obj-y += ptrace.o +obj-y += ptrace.o ptrace-view.o obj-$(CONFIG_PPC64) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 8d076818f1de..e12f6615fc1d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -22,6 +22,45 @@ #define TFSO(f) (offsetof(struct thread_fp_state, f)) #define TSO(f) (offsetof(struct thread_struct, f)) +/* + * These are our native regset flavors. + */ +enum powerpc_regset { + REGSET_GPR, + REGSET_FPR, +#ifdef CONFIG_ALTIVEC + REGSET_VMX, +#endif +#ifdef CONFIG_VSX + REGSET_VSX, +#endif +#ifdef CONFIG_SPE + REGSET_SPE, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + REGSET_TM_CGPR, /* TM checkpointed GPR registers */ + REGSET_TM_CFPR, /* TM checkpointed FPR registers */ + REGSET_TM_CVMX, /* TM checkpointed VMX registers */ + REGSET_TM_CVSX, /* TM checkpointed VSX registers */ + REGSET_TM_SPR, /* TM specific SPR registers */ + REGSET_TM_CTAR, /* TM checkpointed TAR register */ + REGSET_TM_CPPR, /* TM checkpointed PPR register */ + REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ +#endif +#ifdef CONFIG_PPC64 + REGSET_PPR, /* PPR register */ + REGSET_DSCR, /* DSCR register */ +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + REGSET_TAR, /* TAR register */ + REGSET_EBB, /* EBB registers */ + REGSET_PMR, /* Performance Monitor Registers */ +#endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif +}; + /* ptrace-(no)vsx */ int fpr_get(struct task_struct *target, const struct user_regset *regset, @@ -131,3 +170,7 @@ int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset, int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); + +/* ptrace-view */ + +extern const struct user_regset_view user_ppc_native_view; diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c new file mode 100644 index 000000000000..15e3b79b6395 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -0,0 +1,904 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include "ptrace-decl.h" + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +static unsigned long get_user_msr(struct task_struct *task) +{ + return task->thread.regs->msr | task->thread.fpexc_mode; +} + +static int set_user_msr(struct task_struct *task, unsigned long msr) +{ + task->thread.regs->msr &= ~MSR_DEBUGCHANGE; + task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + return 0; +} + +#ifdef CONFIG_PPC64 +static int get_user_dscr(struct task_struct *task, unsigned long *data) +{ + *data = task->thread.dscr; + return 0; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + task->thread.dscr = dscr; + task->thread.dscr_inherit = 1; + return 0; +} +#else +static int get_user_dscr(struct task_struct *task, unsigned long *data) +{ + return -EIO; +} + +static int set_user_dscr(struct task_struct *task, unsigned long dscr) +{ + return -EIO; +} +#endif + +/* + * We prevent mucking around with the reserved area of trap + * which are used internally by the kernel. + */ +static int set_user_trap(struct task_struct *task, unsigned long trap) +{ + task->thread.regs->trap = trap & 0xfff0; + return 0; +} + +/* + * Get contents of register REGNO in task TASK. + */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) +{ + unsigned int regs_max; + + if (task->thread.regs == NULL || !data) + return -EIO; + + if (regno == PT_MSR) { + *data = get_user_msr(task); + return 0; + } + + if (regno == PT_DSCR) + return get_user_dscr(task, data); + + /* + * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is + * no more used as a flag, lets force usr to alway see the softe value as 1 + * which means interrupts are not soft disabled. + */ + if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) { + *data = 1; + return 0; + } + + regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); + if (regno < regs_max) { + regno = array_index_nospec(regno, regs_max); + *data = ((unsigned long *)task->thread.regs)[regno]; + return 0; + } + + return -EIO; +} + +/* + * Write contents of register REGNO in task TASK. + */ +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) +{ + if (task->thread.regs == NULL) + return -EIO; + + if (regno == PT_MSR) + return set_user_msr(task, data); + if (regno == PT_TRAP) + return set_user_trap(task, data); + if (regno == PT_DSCR) + return set_user_dscr(task, data); + + if (regno <= PT_MAX_PUT_REG) { + regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1); + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; +} + +static int gpr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + int i, ret; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, offsetof(struct pt_regs, msr)); + if (!ret) { + unsigned long msr = get_user_msr(target); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, + offsetof(struct pt_regs, msr), + offsetof(struct pt_regs, msr) + + sizeof(msr)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + offsetof(struct pt_regs, orig_gpr3), + sizeof(struct user_pt_regs)); + if (!ret) + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(struct user_pt_regs), -1); + + return ret; +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + unsigned long reg; + int ret; + + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.regs, + 0, PT_MSR * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_MSR * sizeof(reg), + (PT_MSR + 1) * sizeof(reg)); + if (!ret) + ret = set_user_msr(target, reg); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct pt_regs, msr) + sizeof(long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->orig_gpr3, + PT_ORIG_R3 * sizeof(reg), + (PT_MAX_PUT_REG + 1) * sizeof(reg)); + + if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); + + if (!ret && count > 0) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, + PT_TRAP * sizeof(reg), + (PT_TRAP + 1) * sizeof(reg)); + if (!ret) + ret = set_user_trap(target, reg); + } + + if (!ret) + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + + return ret; +} + +#ifdef CONFIG_PPC64 +static int ppr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); +} + +static int ppr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.regs->ppr, 0, sizeof(u64)); +} + +static int dscr_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); +} +static int dscr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); +} +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +static int tar_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); +} +static int tar_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); +} + +static int ebb_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return regset->n; + + return 0; +} + +static int ebb_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (!target->thread.used_ebb) + return -ENODATA; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr, + 0, 3 * sizeof(unsigned long)); +} + +static int ebb_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); + BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + if (target->thread.used_ebb) + return -ENODATA; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr, + 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ebbhr, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.bescr, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + return ret; +} +static int pmu_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return regset->n; +} + +static int pmu_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.siar, + 0, 5 * sizeof(unsigned long)); +} + +static int pmu_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + int ret = 0; + + /* Build tests */ + BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); + BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); + BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); + BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar, + 0, sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sdar, sizeof(unsigned long), + 2 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.sier, 2 * sizeof(unsigned long), + 3 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr2, 3 * sizeof(unsigned long), + 4 * sizeof(unsigned long)); + + if (!ret) + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.mmcr0, 4 * sizeof(unsigned long), + 5 * sizeof(unsigned long)); + return ret; +} +#endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.amr, + 0, ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + +static const struct user_regset native_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .get = gpr_get, .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_VSX + [REGSET_VSX] = { + .core_note_type = NT_PPC_VSX, .n = 32, + .size = sizeof(double), .align = sizeof(double), + .active = vsr_active, .get = vsr_get, .set = vsr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, + [REGSET_PMR] = { + .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + .size = sizeof(u64), .align = sizeof(u64), + .active = pmu_active, .get = pmu_get, .set = pmu_set + }, +#endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif +}; + +const struct user_regset_view user_ppc_native_view = { + .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +#include + +int gpr32_get_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + unsigned long *regs) +{ + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_MSR; --count) + if (__put_user((compat_ulong_t)regs[pos++], u++)) + return -EFAULT; + + if (count > 0 && pos == PT_MSR) { + reg = get_user_msr(target); + if (kbuf) + *k++ = reg; + else if (__put_user(reg, u++)) + return -EFAULT; + ++pos; + --count; + } + + if (kbuf) + for (; count > 0 && pos < PT_REGS_COUNT; --count) + *k++ = regs[pos++]; + else + for (; count > 0 && pos < PT_REGS_COUNT; --count) + if (__put_user((compat_ulong_t)regs[pos++], u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + PT_REGS_COUNT * sizeof(reg), -1); +} + +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + else + for (; count > 0 && pos < PT_MSR; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + + + if (count > 0 && pos == PT_MSR) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_msr(target, reg); + ++pos; + --count; + } + + if (kbuf) { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + } else { + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs[pos++] = reg; + } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + if (__get_user(reg, u++)) + return -EFAULT; + } + + if (count > 0 && pos == PT_TRAP) { + if (kbuf) + reg = *k++; + else if (__get_user(reg, u++)) + return -EFAULT; + set_user_trap(target, reg); + ++pos; + --count; + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); +} + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int i; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* + * We have a partial register set. + * Fill 14-31 with bogus values. + */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); +} + +/* + * These are the regset flavors matching the CONFIG_PPC32 native set. + */ +static const struct user_regset compat_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), + .get = gpr32_get, .set = gpr32_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .get = fpr_get, .set = fpr_set + }, +#ifdef CONFIG_ALTIVEC + [REGSET_VMX] = { + .core_note_type = NT_PPC_VMX, .n = 34, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = vr_active, .get = vr_get, .set = vr_set + }, +#endif +#ifdef CONFIG_SPE + [REGSET_SPE] = { + .core_note_type = NT_PPC_SPE, .n = 35, + .size = sizeof(u32), .align = sizeof(u32), + .active = evr_active, .get = evr_get, .set = evr_set + }, +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + [REGSET_TM_CGPR] = { + .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .active = tm_cgpr_active, + .get = tm_cgpr32_get, .set = tm_cgpr32_set + }, + [REGSET_TM_CFPR] = { + .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set + }, + [REGSET_TM_CVMX] = { + .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + .size = sizeof(vector128), .align = sizeof(vector128), + .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set + }, + [REGSET_TM_CVSX] = { + .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + .size = sizeof(double), .align = sizeof(double), + .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set + }, + [REGSET_TM_SPR] = { + .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set + }, + [REGSET_TM_CTAR] = { + .core_note_type = NT_PPC_TM_CTAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set + }, + [REGSET_TM_CPPR] = { + .core_note_type = NT_PPC_TM_CPPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set + }, + [REGSET_TM_CDSCR] = { + .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set + }, +#endif +#ifdef CONFIG_PPC64 + [REGSET_PPR] = { + .core_note_type = NT_PPC_PPR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = ppr_get, .set = ppr_set + }, + [REGSET_DSCR] = { + .core_note_type = NT_PPC_DSCR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = dscr_get, .set = dscr_set + }, +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + [REGSET_TAR] = { + .core_note_type = NT_PPC_TAR, .n = 1, + .size = sizeof(u64), .align = sizeof(u64), + .get = tar_get, .set = tar_set + }, + [REGSET_EBB] = { + .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + .size = sizeof(u64), .align = sizeof(u64), + .active = ebb_active, .get = ebb_get, .set = ebb_set + }, +#endif +}; + +static const struct user_regset_view user_ppc_compat_view = { + .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, + .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) + return &user_ppc_compat_view; + return &user_ppc_native_view; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 2ed032f00a19..95e66dad32e0 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -17,13 +17,10 @@ #include #include -#include #include #include #include -#include #include -#include #include #include @@ -34,969 +31,6 @@ #include "ptrace-decl.h" -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define STR(s) #s /* convert to string */ -#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} -#define GPR_OFFSET_NAME(num) \ - {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ - {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -static const struct pt_regs_offset regoffset_table[] = { - GPR_OFFSET_NAME(0), - GPR_OFFSET_NAME(1), - GPR_OFFSET_NAME(2), - GPR_OFFSET_NAME(3), - GPR_OFFSET_NAME(4), - GPR_OFFSET_NAME(5), - GPR_OFFSET_NAME(6), - GPR_OFFSET_NAME(7), - GPR_OFFSET_NAME(8), - GPR_OFFSET_NAME(9), - GPR_OFFSET_NAME(10), - GPR_OFFSET_NAME(11), - GPR_OFFSET_NAME(12), - GPR_OFFSET_NAME(13), - GPR_OFFSET_NAME(14), - GPR_OFFSET_NAME(15), - GPR_OFFSET_NAME(16), - GPR_OFFSET_NAME(17), - GPR_OFFSET_NAME(18), - GPR_OFFSET_NAME(19), - GPR_OFFSET_NAME(20), - GPR_OFFSET_NAME(21), - GPR_OFFSET_NAME(22), - GPR_OFFSET_NAME(23), - GPR_OFFSET_NAME(24), - GPR_OFFSET_NAME(25), - GPR_OFFSET_NAME(26), - GPR_OFFSET_NAME(27), - GPR_OFFSET_NAME(28), - GPR_OFFSET_NAME(29), - GPR_OFFSET_NAME(30), - GPR_OFFSET_NAME(31), - REG_OFFSET_NAME(nip), - REG_OFFSET_NAME(msr), - REG_OFFSET_NAME(ctr), - REG_OFFSET_NAME(link), - REG_OFFSET_NAME(xer), - REG_OFFSET_NAME(ccr), -#ifdef CONFIG_PPC64 - REG_OFFSET_NAME(softe), -#else - REG_OFFSET_NAME(mq), -#endif - REG_OFFSET_NAME(trap), - REG_OFFSET_NAME(dar), - REG_OFFSET_NAME(dsisr), - REG_OFFSET_END, -}; - -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -/** - * regs_query_register_name() - query register name from its offset - * @offset: the offset of a register in struct pt_regs. - * - * regs_query_register_name() returns the name of a register from its - * offset in struct pt_regs. If the @offset is invalid, this returns NULL; - */ -const char *regs_query_register_name(unsigned int offset) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (roff->offset == offset) - return roff->name; - return NULL; -} - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -static unsigned long get_user_msr(struct task_struct *task) -{ - return task->thread.regs->msr | task->thread.fpexc_mode; -} - -static int set_user_msr(struct task_struct *task, unsigned long msr) -{ - task->thread.regs->msr &= ~MSR_DEBUGCHANGE; - task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; - return 0; -} - -#ifdef CONFIG_PPC64 -static int get_user_dscr(struct task_struct *task, unsigned long *data) -{ - *data = task->thread.dscr; - return 0; -} - -static int set_user_dscr(struct task_struct *task, unsigned long dscr) -{ - task->thread.dscr = dscr; - task->thread.dscr_inherit = 1; - return 0; -} -#else -static int get_user_dscr(struct task_struct *task, unsigned long *data) -{ - return -EIO; -} - -static int set_user_dscr(struct task_struct *task, unsigned long dscr) -{ - return -EIO; -} -#endif - -/* - * We prevent mucking around with the reserved area of trap - * which are used internally by the kernel. - */ -static int set_user_trap(struct task_struct *task, unsigned long trap) -{ - task->thread.regs->trap = trap & 0xfff0; - return 0; -} - -/* - * Get contents of register REGNO in task TASK. - */ -int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) -{ - unsigned int regs_max; - - if ((task->thread.regs == NULL) || !data) - return -EIO; - - if (regno == PT_MSR) { - *data = get_user_msr(task); - return 0; - } - - if (regno == PT_DSCR) - return get_user_dscr(task, data); - - /* - * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is - * no more used as a flag, lets force usr to alway see the softe value as 1 - * which means interrupts are not soft disabled. - */ - if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) { - *data = 1; - return 0; - } - - regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long); - if (regno < regs_max) { - regno = array_index_nospec(regno, regs_max); - *data = ((unsigned long *)task->thread.regs)[regno]; - return 0; - } - - return -EIO; -} - -/* - * Write contents of register REGNO in task TASK. - */ -int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) -{ - if (task->thread.regs == NULL) - return -EIO; - - if (regno == PT_MSR) - return set_user_msr(task, data); - if (regno == PT_TRAP) - return set_user_trap(task, data); - if (regno == PT_DSCR) - return set_user_dscr(task, data); - - if (regno <= PT_MAX_PUT_REG) { - regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1); - ((unsigned long *)task->thread.regs)[regno] = data; - return 0; - } - return -EIO; -} - -static int gpr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int i, ret; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, offsetof(struct pt_regs, msr)); - if (!ret) { - unsigned long msr = get_user_msr(target); - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, - offsetof(struct pt_regs, msr), - offsetof(struct pt_regs, msr) + - sizeof(msr)); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - offsetof(struct pt_regs, orig_gpr3), - sizeof(struct user_pt_regs)); - if (!ret) - ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct user_pt_regs), -1); - - return ret; -} - -static int gpr_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - unsigned long reg; - int ret; - - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.regs, - 0, PT_MSR * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_MSR * sizeof(reg), - (PT_MSR + 1) * sizeof(reg)); - if (!ret) - ret = set_user_msr(target, reg); - } - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->orig_gpr3, - PT_ORIG_R3 * sizeof(reg), - (PT_MAX_PUT_REG + 1) * sizeof(reg)); - - if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); - - if (!ret && count > 0) { - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, - PT_TRAP * sizeof(reg), - (PT_TRAP + 1) * sizeof(reg)); - if (!ret) - ret = set_user_trap(target, reg); - } - - if (!ret) - ret = user_regset_copyin_ignore( - &pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); - - return ret; -} - -#ifdef CONFIG_PPC64 -static int ppr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); -} - -static int ppr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.regs->ppr, 0, sizeof(u64)); -} - -static int dscr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); -} -static int dscr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); -} -#endif -#ifdef CONFIG_PPC_BOOK3S_64 -static int tar_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); -} -static int tar_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); -} - -static int ebb_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (target->thread.used_ebb) - return regset->n; - - return 0; -} - -static int ebb_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - /* Build tests */ - BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); - BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (!target->thread.used_ebb) - return -ENODATA; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbrr, 0, 3 * sizeof(unsigned long)); -} - -static int ebb_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - - /* Build tests */ - BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr)); - BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - if (target->thread.used_ebb) - return -ENODATA; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbrr, 0, sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ebbhr, sizeof(unsigned long), - 2 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.bescr, - 2 * sizeof(unsigned long), 3 * sizeof(unsigned long)); - - return ret; -} -static int pmu_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return regset->n; -} - -static int pmu_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - /* Build tests */ - BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); - BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); - BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); - BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.siar, 0, - 5 * sizeof(unsigned long)); -} - -static int pmu_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret = 0; - - /* Build tests */ - BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar)); - BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier)); - BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2)); - BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0)); - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.siar, 0, - sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.sdar, sizeof(unsigned long), - 2 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.sier, 2 * sizeof(unsigned long), - 3 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.mmcr2, 3 * sizeof(unsigned long), - 4 * sizeof(unsigned long)); - - if (!ret) - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.mmcr0, 4 * sizeof(unsigned long), - 5 * sizeof(unsigned long)); - return ret; -} -#endif - -#ifdef CONFIG_PPC_MEM_KEYS -static int pkey_active(struct task_struct *target, - const struct user_regset *regset) -{ - if (!arch_pkeys_enabled()) - return -ENODEV; - - return regset->n; -} - -static int pkey_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); - - if (!arch_pkeys_enabled()) - return -ENODEV; - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.amr, 0, - ELF_NPKEY * sizeof(unsigned long)); -} - -static int pkey_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - u64 new_amr; - int ret; - - if (!arch_pkeys_enabled()) - return -ENODEV; - - /* Only the AMR can be set from userspace */ - if (pos != 0 || count != sizeof(new_amr)) - return -EINVAL; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &new_amr, 0, sizeof(new_amr)); - if (ret) - return ret; - - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); - - return 0; -} -#endif /* CONFIG_PPC_MEM_KEYS */ - -/* - * These are our native regset flavors. - */ -enum powerpc_regset { - REGSET_GPR, - REGSET_FPR, -#ifdef CONFIG_ALTIVEC - REGSET_VMX, -#endif -#ifdef CONFIG_VSX - REGSET_VSX, -#endif -#ifdef CONFIG_SPE - REGSET_SPE, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - REGSET_TM_CGPR, /* TM checkpointed GPR registers */ - REGSET_TM_CFPR, /* TM checkpointed FPR registers */ - REGSET_TM_CVMX, /* TM checkpointed VMX registers */ - REGSET_TM_CVSX, /* TM checkpointed VSX registers */ - REGSET_TM_SPR, /* TM specific SPR registers */ - REGSET_TM_CTAR, /* TM checkpointed TAR register */ - REGSET_TM_CPPR, /* TM checkpointed PPR register */ - REGSET_TM_CDSCR, /* TM checkpointed DSCR register */ -#endif -#ifdef CONFIG_PPC64 - REGSET_PPR, /* PPR register */ - REGSET_DSCR, /* DSCR register */ -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - REGSET_TAR, /* TAR register */ - REGSET_EBB, /* EBB registers */ - REGSET_PMR, /* Performance Monitor Registers */ -#endif -#ifdef CONFIG_PPC_MEM_KEYS - REGSET_PKEY, /* AMR register */ -#endif -}; - -static const struct user_regset native_regsets[] = { - [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .get = gpr_get, .set = gpr_set - }, - [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set - }, -#ifdef CONFIG_ALTIVEC - [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set - }, -#endif -#ifdef CONFIG_VSX - [REGSET_VSX] = { - .core_note_type = NT_PPC_VSX, .n = 32, - .size = sizeof(double), .align = sizeof(double), - .active = vsr_active, .get = vsr_get, .set = vsr_set - }, -#endif -#ifdef CONFIG_SPE - [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, - .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set - }, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, .get = tm_cgpr_get, .set = tm_cgpr_set - }, - [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set - }, - [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set - }, - [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set - }, - [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set - }, - [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set - }, - [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set - }, - [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set - }, -#endif -#ifdef CONFIG_PPC64 - [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set - }, - [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set - }, -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set - }, - [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, - .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set - }, - [REGSET_PMR] = { - .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, - .size = sizeof(u64), .align = sizeof(u64), - .active = pmu_active, .get = pmu_get, .set = pmu_set - }, -#endif -#ifdef CONFIG_PPC_MEM_KEYS - [REGSET_PKEY] = { - .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, - .size = sizeof(u64), .align = sizeof(u64), - .active = pkey_active, .get = pkey_get, .set = pkey_set - }, -#endif -}; - -static const struct user_regset_view user_ppc_native_view = { - .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, - .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) -}; - -#include - -int gpr32_get_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, - unsigned long *regs) -{ - compat_ulong_t *k = kbuf; - compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_MSR; --count) - if (__put_user((compat_ulong_t) regs[pos++], u++)) - return -EFAULT; - - if (count > 0 && pos == PT_MSR) { - reg = get_user_msr(target); - if (kbuf) - *k++ = reg; - else if (__put_user(reg, u++)) - return -EFAULT; - ++pos; - --count; - } - - if (kbuf) - for (; count > 0 && pos < PT_REGS_COUNT; --count) - *k++ = regs[pos++]; - else - for (; count > 0 && pos < PT_REGS_COUNT; --count) - if (__put_user((compat_ulong_t) regs[pos++], u++)) - return -EFAULT; - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - PT_REGS_COUNT * sizeof(reg), -1); -} - -int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) -{ - const compat_ulong_t *k = kbuf; - const compat_ulong_t __user *u = ubuf; - compat_ulong_t reg; - - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - if (__get_user(reg, u++)) - return -EFAULT; - regs[pos++] = reg; - } - - - if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; - set_user_msr(target, reg); - ++pos; - --count; - } - - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - if (__get_user(reg, u++)) - return -EFAULT; - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - if (__get_user(reg, u++)) - return -EFAULT; - } - - if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; - set_user_trap(target, reg); - ++pos; - --count; - } - - kbuf = k; - ubuf = u; - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); -} - -static int gpr32_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int i; - - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.regs->gpr[0]); -} - -static int gpr32_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, - &target->thread.regs->gpr[0]); -} - -/* - * These are the regset flavors matching the CONFIG_PPC32 native set. - */ -static const struct user_regset compat_regsets[] = { - [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, - .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = gpr32_get, .set = gpr32_set - }, - [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .get = fpr_get, .set = fpr_set - }, -#ifdef CONFIG_ALTIVEC - [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = vr_active, .get = vr_get, .set = vr_set - }, -#endif -#ifdef CONFIG_SPE - [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, - .size = sizeof(u32), .align = sizeof(u32), - .active = evr_active, .get = evr_get, .set = evr_set - }, -#endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, - .size = sizeof(long), .align = sizeof(long), - .active = tm_cgpr_active, - .get = tm_cgpr32_get, .set = tm_cgpr32_set - }, - [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cfpr_active, .get = tm_cfpr_get, .set = tm_cfpr_set - }, - [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, - .size = sizeof(vector128), .align = sizeof(vector128), - .active = tm_cvmx_active, .get = tm_cvmx_get, .set = tm_cvmx_set - }, - [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, - .size = sizeof(double), .align = sizeof(double), - .active = tm_cvsx_active, .get = tm_cvsx_get, .set = tm_cvsx_set - }, - [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_spr_active, .get = tm_spr_get, .set = tm_spr_set - }, - [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_tar_active, .get = tm_tar_get, .set = tm_tar_set - }, - [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_ppr_active, .get = tm_ppr_get, .set = tm_ppr_set - }, - [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .active = tm_dscr_active, .get = tm_dscr_get, .set = tm_dscr_set - }, -#endif -#ifdef CONFIG_PPC64 - [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = ppr_get, .set = ppr_set - }, - [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = dscr_get, .set = dscr_set - }, -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, - .size = sizeof(u64), .align = sizeof(u64), - .get = tar_get, .set = tar_set - }, - [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, - .size = sizeof(u64), .align = sizeof(u64), - .active = ebb_active, .get = ebb_get, .set = ebb_set - }, -#endif -}; - -static const struct user_regset_view user_ppc_compat_view = { - .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI, - .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) -}; - -const struct user_regset_view *task_user_regset_view(struct task_struct *task) -{ - if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) - return &user_ppc_compat_view; - return &user_ppc_native_view; -} - - void user_enable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; -- cgit v1.2.3-59-g8ed1b From 323a780ca1fccbf467cdace6c0de25459552083f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:46 +0000 Subject: powerpc/ptrace: split out ADV_DEBUG_REGS related functions. Move ADV_DEBUG_REGS functions out of ptrace.c, into ptrace-adv.c and ptrace-noadv.c Signed-off-by: Christophe Leroy [mpe: Squash in fixup patch from Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e2bd7d275bd5933d848aad4fee3ca652a14d039b.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/Makefile | 4 + arch/powerpc/kernel/ptrace/ptrace-adv.c | 468 +++++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace-decl.h | 5 + arch/powerpc/kernel/ptrace/ptrace-noadv.c | 232 +++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 650 ------------------------------ 5 files changed, 709 insertions(+), 650 deletions(-) create mode 100644 arch/powerpc/kernel/ptrace/ptrace-adv.c create mode 100644 arch/powerpc/kernel/ptrace/ptrace-noadv.c diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 7addc5994bb9..e9d97c2d063e 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -14,3 +14,7 @@ endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o +obj-$(CONFIG_PPC_ADV_DEBUG_REGS) += ptrace-adv.o +ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y) +obj-y += ptrace-noadv.o +endif diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c new file mode 100644 index 000000000000..eebcd41edc3d --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -0,0 +1,468 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include "ptrace-decl.h" + +void user_enable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + task->thread.debug.dbcr0 &= ~DBCR0_BT; + task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; + regs->msr |= MSR_DE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + task->thread.debug.dbcr0 &= ~DBCR0_IC; + task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; + regs->msr |= MSR_DE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT); + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.debug.dbcr0 &= ~DBCR0_IDM; + regs->msr &= ~MSR_DE; + } + } + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &task->thread; + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits in dabr are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.debug.dac1 = data & ~0x3UL; + + if (task->thread.debug.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, + task->thread.debug.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.debug.dbcr0 &= ~DBCR0_IDM; + } + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ + task->thread.debug.dbcr0 |= DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (data & 0x1UL) + dbcr_dac(task) |= DBCR_DAC1R; + if (data & 0x2UL) + dbcr_dac(task) |= DBCR_DAC1W; + task->thread.regs->msr |= MSR_DE; + return 0; +} + +static long set_instruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if (!slot1_in_use && !slot2_in_use) { + slot = 1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.iac2 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.iac4 = bp_info->addr2; + child->thread.debug.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else { + return -ENOSPC; + } + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || slot3_in_use == slot4_in_use) { + slot = 1; + child->thread.debug.iac1 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.debug.iac2 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.debug.iac3 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.debug.iac4 = bp_info->addr; + child->thread.debug.dbcr0 |= DBCR0_IAC4; +#endif + } else { + return -ENOSPC; + } + } +out: + child->thread.debug.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.debug.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.debug.iac1 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.debug.iac2 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.debug.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.debug.iac3 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.debug.iac4 = 0; + child->thread.debug.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && condition_mode == 0) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.debug.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.debug.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.debug.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.debug.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.debug.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.debug.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else { + return -ENOSPC; + } + child->thread.debug.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.debug.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { + child->thread.debug.dac2 = 0; + child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.debug.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.debug.dvc2 = 0; +#endif + child->thread.debug.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else { + return -EINVAL; + } + + return 0; +} + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.debug.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.debug.dac1 = bp_info->addr; + child->thread.debug.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.debug.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.debug.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.debug.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + if (bp_info->version != 1) + return -ENOTSUPP; + /* + * Check for invalid flags and combinations + */ + if (bp_info->trigger_type == 0 || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; + return set_instruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +} + +long ppc_del_hwdebug(struct task_struct *child, long data) +{ + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, + child->thread.debug.dbcr1)) { + child->thread.debug.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index e12f6615fc1d..bdba09a87aea 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -174,3 +174,8 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-view */ extern const struct user_regset_view user_ppc_native_view; + +/* ptrace-(no)adv */ +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data); +long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info); +long ppc_del_hwdebug(struct task_struct *child, long data); diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c new file mode 100644 index 000000000000..d4170932acb4 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include + +#include "ptrace-decl.h" + +void user_enable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + regs->msr &= ~MSR_BE; + regs->msr |= MSR_SE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) { + regs->msr &= ~MSR_SE; + regs->msr |= MSR_BE; + } + set_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +void user_disable_single_step(struct task_struct *task) +{ + struct pt_regs *regs = task->thread.regs; + + if (regs != NULL) + regs->msr &= ~(MSR_SE | MSR_BE); + + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} + +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret; + struct thread_struct *thread = &task->thread; + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + bool set_bp = true; + struct arch_hw_breakpoint hw_brk; + + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ + if (addr > 0) + return -EINVAL; + + /* The bottom 3 bits in dabr are flags */ + if ((data & ~0x7UL) >= TASK_SIZE) + return -EIO; + + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ + if (data && !(data & HW_BRK_TYPE_TRANSLATE)) + return -EIO; + hw_brk.address = data & (~HW_BRK_TYPE_DABR); + hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; + hw_brk.len = DABR_MAX_LEN; + hw_brk.hw_len = DABR_MAX_LEN; + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if (!set_bp) { + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } + return 0; + } + if (bp) { + attr = bp->attr; + attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; + arch_bp_generic_fields(hw_brk.type, &attr.bp_type); + + /* Enable breakpoint */ + attr.disabled = false; + + ret = modify_user_hw_breakpoint(bp, &attr); + if (ret) + return ret; + + thread->ptrace_bps[0] = bp; + thread->hw_brk = hw_brk; + return 0; + } + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = hw_brk.address; + attr.bp_len = DABR_MAX_LEN; + arch_bp_generic_fields(hw_brk.type, + &attr.bp_type); + + thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, + ptrace_triggered, NULL, task); + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + if (set_bp && (!ppc_breakpoint_available())) + return -ENODEV; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + task->thread.hw_brk = hw_brk; + return 0; +} + +long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int len = 0; + struct thread_struct *thread = &child->thread; + struct perf_event *bp; + struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + struct arch_hw_breakpoint brk; + + if (bp_info->version != 1) + return -ENOTSUPP; + /* + * We only support one data breakpoint + */ + if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || + (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; + brk.type = HW_BRK_TYPE_TRANSLATE; + brk.len = DABR_MAX_LEN; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + brk.type |= HW_BRK_TYPE_READ; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + brk.type |= HW_BRK_TYPE_WRITE; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + len = bp_info->addr2 - bp_info->addr; + else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + len = 1; + else + return -EINVAL; + bp = thread->ptrace_bps[0]; + if (bp) + return -ENOSPC; + + /* Create a new breakpoint request if one doesn't exist already */ + hw_breakpoint_init(&attr); + attr.bp_addr = (unsigned long)bp_info->addr; + attr.bp_len = len; + arch_bp_generic_fields(brk.type, &attr.bp_type); + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child); + thread->ptrace_bps[0] = bp; + if (IS_ERR(bp)) { + thread->ptrace_bps[0] = NULL; + return PTR_ERR(bp); + } + + return 1; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) + return -EINVAL; + + if (child->thread.hw_brk.address) + return -ENOSPC; + + if (!ppc_breakpoint_available()) + return -ENODEV; + + child->thread.hw_brk = brk; + + return 1; +} + +long ppc_del_hwdebug(struct task_struct *child, long data) +{ +#ifdef CONFIG_HAVE_HW_BREAKPOINT + int ret = 0; + struct thread_struct *thread = &child->thread; + struct perf_event *bp; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + if (data != 1) + return -EINVAL; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + bp = thread->ptrace_bps[0]; + if (bp) { + unregister_hw_breakpoint(bp); + thread->ptrace_bps[0] = NULL; + } else { + ret = -ENOENT; + } + return ret; +#else /* CONFIG_HAVE_HW_BREAKPOINT */ + if (child->thread.hw_brk.address == 0) + return -ENOENT; + + child->thread.hw_brk.address = 0; + child->thread.hw_brk.type = 0; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + return 0; +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 95e66dad32e0..48e095e88a2f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -31,71 +31,6 @@ #include "ptrace-decl.h" -void user_enable_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.debug.dbcr0 &= ~DBCR0_BT; - task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; -#else - regs->msr &= ~MSR_BE; - regs->msr |= MSR_SE; -#endif - } - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -void user_enable_block_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - task->thread.debug.dbcr0 &= ~DBCR0_IC; - task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; - regs->msr |= MSR_DE; -#else - regs->msr &= ~MSR_SE; - regs->msr |= MSR_BE; -#endif - } - set_tsk_thread_flag(task, TIF_SINGLESTEP); -} - -void user_disable_single_step(struct task_struct *task) -{ - struct pt_regs *regs = task->thread.regs; - - if (regs != NULL) { -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * The logic to disable single stepping should be as - * simple as turning off the Instruction Complete flag. - * And, after doing so, if all debug flags are off, turn - * off DBCR0(IDM) and MSR(DE) .... Torez - */ - task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT); - /* - * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. - */ - if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, - task->thread.debug.dbcr1)) { - /* - * All debug events were off..... - */ - task->thread.debug.dbcr0 &= ~DBCR0_IDM; - regs->msr &= ~MSR_DE; - } -#else - regs->msr &= ~(MSR_SE | MSR_BE); -#endif - } - clear_tsk_thread_flag(task, TIF_SINGLESTEP); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) @@ -114,138 +49,6 @@ void ptrace_triggered(struct perf_event *bp, } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, - unsigned long data) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int ret; - struct thread_struct *thread = &(task->thread); - struct perf_event *bp; - struct perf_event_attr attr; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - bool set_bp = true; - struct arch_hw_breakpoint hw_brk; -#endif - - /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). - * For embedded processors we support one DAC and no IAC's at the - * moment. - */ - if (addr > 0) - return -EINVAL; - - /* The bottom 3 bits in dabr are flags */ - if ((data & ~0x7UL) >= TASK_SIZE) - return -EIO; - -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. - * It was assumed, on previous implementations, that 3 bits were - * passed together with the data address, fitting the design of the - * DABR register, as follows: - * - * bit 0: Read flag - * bit 1: Write flag - * bit 2: Breakpoint translation - * - * Thus, we use them here as so. - */ - - /* Ensure breakpoint translation bit is set */ - if (data && !(data & HW_BRK_TYPE_TRANSLATE)) - return -EIO; - hw_brk.address = data & (~HW_BRK_TYPE_DABR); - hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; - hw_brk.len = DABR_MAX_LEN; - hw_brk.hw_len = DABR_MAX_LEN; - set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); -#ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; - if (!set_bp) { - if (bp) { - unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; - } - return 0; - } - if (bp) { - attr = bp->attr; - attr.bp_addr = hw_brk.address; - attr.bp_len = DABR_MAX_LEN; - arch_bp_generic_fields(hw_brk.type, &attr.bp_type); - - /* Enable breakpoint */ - attr.disabled = false; - - ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) { - return ret; - } - thread->ptrace_bps[0] = bp; - thread->hw_brk = hw_brk; - return 0; - } - - /* Create a new breakpoint request if one doesn't exist already */ - hw_breakpoint_init(&attr); - attr.bp_addr = hw_brk.address; - attr.bp_len = DABR_MAX_LEN; - arch_bp_generic_fields(hw_brk.type, - &attr.bp_type); - - thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, - ptrace_triggered, NULL, task); - if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; - return PTR_ERR(bp); - } - -#else /* !CONFIG_HAVE_HW_BREAKPOINT */ - if (set_bp && (!ppc_breakpoint_available())) - return -ENODEV; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - task->thread.hw_brk = hw_brk; -#else /* CONFIG_PPC_ADV_DEBUG_REGS */ - /* As described above, it was assumed 3 bits were passed with the data - * address, but we will assume only the mode bits will be passed - * as to not cause alignment restrictions for DAC-based processors. - */ - - /* DAC's hold the whole address without any mode flags */ - task->thread.debug.dac1 = data & ~0x3UL; - - if (task->thread.debug.dac1 == 0) { - dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); - if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, - task->thread.debug.dbcr1)) { - task->thread.regs->msr &= ~MSR_DE; - task->thread.debug.dbcr0 &= ~DBCR0_IDM; - } - return 0; - } - - /* Read or Write bits must be set */ - - if (!(data & 0x3UL)) - return -EINVAL; - - /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 - register */ - task->thread.debug.dbcr0 |= DBCR0_IDM; - - /* Check for write and read flags and set DBCR0 - accordingly */ - dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); - if (data & 0x1UL) - dbcr_dac(task) |= DBCR_DAC1R; - if (data & 0x2UL) - dbcr_dac(task) |= DBCR_DAC1W; - task->thread.regs->msr |= MSR_DE; -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - return 0; -} - /* * Called by kernel/ptrace.c when detaching.. * @@ -257,459 +60,6 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static long set_instruction_bp(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ - int slot; - int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0); - int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0); - int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0); - int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0); - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) - slot2_in_use = 1; - if (dbcr_iac_range(child) & DBCR_IAC34MODE) - slot4_in_use = 1; - - if (bp_info->addr >= TASK_SIZE) - return -EIO; - - if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { - - /* Make sure range is valid. */ - if (bp_info->addr2 >= TASK_SIZE) - return -EIO; - - /* We need a pair of IAC regsisters */ - if ((!slot1_in_use) && (!slot2_in_use)) { - slot = 1; - child->thread.debug.iac1 = bp_info->addr; - child->thread.debug.iac2 = bp_info->addr2; - child->thread.debug.dbcr0 |= DBCR0_IAC1; - if (bp_info->addr_mode == - PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - dbcr_iac_range(child) |= DBCR_IAC12X; - else - dbcr_iac_range(child) |= DBCR_IAC12I; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - } else if ((!slot3_in_use) && (!slot4_in_use)) { - slot = 3; - child->thread.debug.iac3 = bp_info->addr; - child->thread.debug.iac4 = bp_info->addr2; - child->thread.debug.dbcr0 |= DBCR0_IAC3; - if (bp_info->addr_mode == - PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - dbcr_iac_range(child) |= DBCR_IAC34X; - else - dbcr_iac_range(child) |= DBCR_IAC34I; -#endif - } else - return -ENOSPC; - } else { - /* We only need one. If possible leave a pair free in - * case a range is needed later - */ - if (!slot1_in_use) { - /* - * Don't use iac1 if iac1-iac2 are free and either - * iac3 or iac4 (but not both) are free - */ - if (slot2_in_use || (slot3_in_use == slot4_in_use)) { - slot = 1; - child->thread.debug.iac1 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC1; - goto out; - } - } - if (!slot2_in_use) { - slot = 2; - child->thread.debug.iac2 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC2; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - } else if (!slot3_in_use) { - slot = 3; - child->thread.debug.iac3 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC3; - } else if (!slot4_in_use) { - slot = 4; - child->thread.debug.iac4 = bp_info->addr; - child->thread.debug.dbcr0 |= DBCR0_IAC4; -#endif - } else - return -ENOSPC; - } -out: - child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; - - return slot; -} - -static int del_instruction_bp(struct task_struct *child, int slot) -{ - switch (slot) { - case 1: - if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) { - /* address range - clear slots 1 & 2 */ - child->thread.debug.iac2 = 0; - dbcr_iac_range(child) &= ~DBCR_IAC12MODE; - } - child->thread.debug.iac1 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC1; - break; - case 2: - if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC12MODE) - /* used in a range */ - return -EINVAL; - child->thread.debug.iac2 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC2; - break; -#if CONFIG_PPC_ADV_DEBUG_IACS > 2 - case 3: - if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC34MODE) { - /* address range - clear slots 3 & 4 */ - child->thread.debug.iac4 = 0; - dbcr_iac_range(child) &= ~DBCR_IAC34MODE; - } - child->thread.debug.iac3 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC3; - break; - case 4: - if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0) - return -ENOENT; - - if (dbcr_iac_range(child) & DBCR_IAC34MODE) - /* Used in a range */ - return -EINVAL; - child->thread.debug.iac4 = 0; - child->thread.debug.dbcr0 &= ~DBCR0_IAC4; - break; -#endif - default: - return -EINVAL; - } - return 0; -} - -static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) -{ - int byte_enable = - (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) - & 0xf; - int condition_mode = - bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; - int slot; - - if (byte_enable && (condition_mode == 0)) - return -EINVAL; - - if (bp_info->addr >= TASK_SIZE) - return -EIO; - - if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { - slot = 1; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dbcr_dac(child) |= DBCR_DAC1R; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dbcr_dac(child) |= DBCR_DAC1W; - child->thread.debug.dac1 = (unsigned long)bp_info->addr; -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - if (byte_enable) { - child->thread.debug.dvc1 = - (unsigned long)bp_info->condition_value; - child->thread.debug.dbcr2 |= - ((byte_enable << DBCR2_DVC1BE_SHIFT) | - (condition_mode << DBCR2_DVC1M_SHIFT)); - } -#endif -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { - /* Both dac1 and dac2 are part of a range */ - return -ENOSPC; -#endif - } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { - slot = 2; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - dbcr_dac(child) |= DBCR_DAC2R; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - dbcr_dac(child) |= DBCR_DAC2W; - child->thread.debug.dac2 = (unsigned long)bp_info->addr; -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - if (byte_enable) { - child->thread.debug.dvc2 = - (unsigned long)bp_info->condition_value; - child->thread.debug.dbcr2 |= - ((byte_enable << DBCR2_DVC2BE_SHIFT) | - (condition_mode << DBCR2_DVC2M_SHIFT)); - } -#endif - } else - return -ENOSPC; - child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; - - return slot + 4; -} - -static int del_dac(struct task_struct *child, int slot) -{ - if (slot == 1) { - if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) - return -ENOENT; - - child->thread.debug.dac1 = 0; - dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) { - child->thread.debug.dac2 = 0; - child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; - } - child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); -#endif -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.debug.dvc1 = 0; -#endif - } else if (slot == 2) { - if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) - return -ENOENT; - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) - /* Part of a range */ - return -EINVAL; - child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); -#endif -#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - child->thread.debug.dvc2 = 0; -#endif - child->thread.debug.dac2 = 0; - dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); - } else - return -EINVAL; - - return 0; -} -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE -static int set_dac_range(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ - int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; - - /* We don't allow range watchpoints to be used with DVC */ - if (bp_info->condition_mode) - return -EINVAL; - - /* - * Best effort to verify the address range. The user/supervisor bits - * prevent trapping in kernel space, but let's fail on an obvious bad - * range. The simple test on the mask is not fool-proof, and any - * exclusive range will spill over into kernel space. - */ - if (bp_info->addr >= TASK_SIZE) - return -EIO; - if (mode == PPC_BREAKPOINT_MODE_MASK) { - /* - * dac2 is a bitmask. Don't allow a mask that makes a - * kernel space address from a valid dac1 value - */ - if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) - return -EIO; - } else { - /* - * For range breakpoints, addr2 must also be a valid address - */ - if (bp_info->addr2 >= TASK_SIZE) - return -EIO; - } - - if (child->thread.debug.dbcr0 & - (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) - return -ENOSPC; - - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); - child->thread.debug.dac1 = bp_info->addr; - child->thread.debug.dac2 = bp_info->addr2; - if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - child->thread.debug.dbcr2 |= DBCR2_DAC12M; - else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) - child->thread.debug.dbcr2 |= DBCR2_DAC12MX; - else /* PPC_BREAKPOINT_MODE_MASK */ - child->thread.debug.dbcr2 |= DBCR2_DAC12MM; - child->thread.regs->msr |= MSR_DE; - - return 5; -} -#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ - -static long ppc_set_hwdebug(struct task_struct *child, - struct ppc_hw_breakpoint *bp_info) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int len = 0; - struct thread_struct *thread = &(child->thread); - struct perf_event *bp; - struct perf_event_attr attr; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - struct arch_hw_breakpoint brk; -#endif - - if (bp_info->version != 1) - return -ENOTSUPP; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * Check for invalid flags and combinations - */ - if ((bp_info->trigger_type == 0) || - (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | - PPC_BREAKPOINT_TRIGGER_RW)) || - (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || - (bp_info->condition_mode & - ~(PPC_BREAKPOINT_CONDITION_MODE | - PPC_BREAKPOINT_CONDITION_BE_ALL))) - return -EINVAL; -#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 - if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) - return -EINVAL; -#endif - - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { - if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || - (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) - return -EINVAL; - return set_instruction_bp(child, bp_info); - } - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) - return set_dac(child, bp_info); - -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - return set_dac_range(child, bp_info); -#else - return -EINVAL; -#endif -#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ - /* - * We only support one data breakpoint - */ - if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || - (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || - bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) - return -EINVAL; - - if ((unsigned long)bp_info->addr >= TASK_SIZE) - return -EIO; - - brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN; - brk.type = HW_BRK_TYPE_TRANSLATE; - brk.len = DABR_MAX_LEN; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) - brk.type |= HW_BRK_TYPE_READ; - if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) - brk.type |= HW_BRK_TYPE_WRITE; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) - len = bp_info->addr2 - bp_info->addr; - else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) - len = 1; - else - return -EINVAL; - bp = thread->ptrace_bps[0]; - if (bp) - return -ENOSPC; - - /* Create a new breakpoint request if one doesn't exist already */ - hw_breakpoint_init(&attr); - attr.bp_addr = (unsigned long)bp_info->addr; - attr.bp_len = len; - arch_bp_generic_fields(brk.type, &attr.bp_type); - - thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, - ptrace_triggered, NULL, child); - if (IS_ERR(bp)) { - thread->ptrace_bps[0] = NULL; - return PTR_ERR(bp); - } - - return 1; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) - return -EINVAL; - - if (child->thread.hw_brk.address) - return -ENOSPC; - - if (!ppc_breakpoint_available()) - return -ENODEV; - - child->thread.hw_brk = brk; - - return 1; -#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ -} - -static long ppc_del_hwdebug(struct task_struct *child, long data) -{ -#ifdef CONFIG_HAVE_HW_BREAKPOINT - int ret = 0; - struct thread_struct *thread = &(child->thread); - struct perf_event *bp; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - int rc; - - if (data <= 4) - rc = del_instruction_bp(child, (int)data); - else - rc = del_dac(child, (int)data - 4); - - if (!rc) { - if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, - child->thread.debug.dbcr1)) { - child->thread.debug.dbcr0 &= ~DBCR0_IDM; - child->thread.regs->msr &= ~MSR_DE; - } - } - return rc; -#else - if (data != 1) - return -EINVAL; - -#ifdef CONFIG_HAVE_HW_BREAKPOINT - bp = thread->ptrace_bps[0]; - if (bp) { - unregister_hw_breakpoint(bp); - thread->ptrace_bps[0] = NULL; - } else - ret = -ENOENT; - return ret; -#else /* CONFIG_HAVE_HW_BREAKPOINT */ - if (child->thread.hw_brk.address == 0) - return -ENOENT; - - child->thread.hw_brk.address = 0; - child->thread.hw_brk.type = 0; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - - return 0; -#endif -} - long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { -- cgit v1.2.3-59-g8ed1b From e08227d25a26901d13c6b597e60dffba40e2659d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:47 +0000 Subject: powerpc/ptrace: create ptrace_get_debugreg() Create ptrace_get_debugreg() to handle PTRACE_GET_DEBUGREG and reduce ifdef mess Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c1482c41a39cc216f4073a51070d8680f52d5054.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/ptrace-adv.c | 9 +++++++++ arch/powerpc/kernel/ptrace/ptrace-decl.h | 2 ++ arch/powerpc/kernel/ptrace/ptrace-noadv.c | 13 +++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 18 ++---------------- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c index eebcd41edc3d..c71bedd54a8b 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-adv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -56,6 +56,15 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } +int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, + unsigned long __user *datalp) +{ + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + return put_user(child->thread.debug.dac1, datalp); +} + int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index bdba09a87aea..4b4b6a1d508a 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -176,6 +176,8 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, extern const struct user_regset_view user_ppc_native_view; /* ptrace-(no)adv */ +int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, + unsigned long __user *datalp); int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data); long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info); long ppc_del_hwdebug(struct task_struct *child, long data); diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index d4170932acb4..a6ad492badc6 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -39,6 +39,19 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } +int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, + unsigned long __user *datalp) +{ + unsigned long dabr_fake; + + /* We only support one DABR and no IABRS at the moment */ + if (addr > 0) + return -EINVAL; + dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | + (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); + return put_user(dabr_fake, datalp); +} + int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 48e095e88a2f..d6e1a301d20e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -191,23 +191,9 @@ long arch_ptrace(struct task_struct *child, long request, break; } - case PTRACE_GET_DEBUGREG: { -#ifndef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dabr_fake; -#endif - ret = -EINVAL; - /* We only support one DABR and no IABRS at the moment */ - if (addr > 0) - break; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.debug.dac1, datalp); -#else - dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) | - (child->thread.hw_brk.type & HW_BRK_TYPE_DABR)); - ret = put_user(dabr_fake, datalp); -#endif + case PTRACE_GET_DEBUGREG: + ret = ptrace_get_debugreg(child, addr, datalp); break; - } case PTRACE_SET_DEBUGREG: ret = ptrace_set_debugreg(child, addr, data); -- cgit v1.2.3-59-g8ed1b From da529d4739abbb7681b1804c5b9bf615a5796f43 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:48 +0000 Subject: powerpc/ptrace: create ppc_gethwdinfo() Create ippc_gethwdinfo() to handle PPC_PTRACE_GETHWDBGINFO and reduce ifdef mess Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/82fefcc1ec75b96cece792878217a5d85ecda0c2.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/ptrace/ptrace-adv.c | 15 +++++++++++++++ arch/powerpc/kernel/ptrace/ptrace-decl.h | 1 + arch/powerpc/kernel/ptrace/ptrace-noadv.c | 20 +++++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 32 +------------------------------ 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c index c71bedd54a8b..3990c01ef8cf 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-adv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -56,6 +56,21 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } +void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) +{ + dbginfo->version = 1; + dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo->data_bp_alignment = 4; + dbginfo->sizeof_condition = 4; + dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; + if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE)) + dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +} + int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, unsigned long __user *datalp) { diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 4b4b6a1d508a..3c8a81999292 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -176,6 +176,7 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, extern const struct user_regset_view user_ppc_native_view; /* ptrace-(no)adv */ +void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, unsigned long __user *datalp); int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data); diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index a6ad492badc6..f87e7c5c3bf3 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -39,6 +39,26 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } +void ppc_gethwdinfo(struct ppc_debug_info *dbginfo) +{ + dbginfo->version = 1; + dbginfo->num_instruction_bps = 0; + if (ppc_breakpoint_available()) + dbginfo->num_data_bps = 1; + else + dbginfo->num_data_bps = 0; + dbginfo->num_condition_regs = 0; + dbginfo->data_bp_alignment = sizeof(long); + dbginfo->sizeof_condition = 0; + if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) { + dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; + if (dawr_enabled()) + dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; + } else { + dbginfo->features = 0; + } +} + int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, unsigned long __user *datalp) { diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index d6e1a301d20e..a44f6e5e05ff 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -139,37 +139,7 @@ long arch_ptrace(struct task_struct *child, long request, case PPC_PTRACE_GETHWDBGINFO: { struct ppc_debug_info dbginfo; - dbginfo.version = 1; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; - dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; - dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; - dbginfo.data_bp_alignment = 4; - dbginfo.sizeof_condition = 4; - dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | - PPC_DEBUG_FEATURE_INSN_BP_MASK; -#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE - dbginfo.features |= - PPC_DEBUG_FEATURE_DATA_BP_RANGE | - PPC_DEBUG_FEATURE_DATA_BP_MASK; -#endif -#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ - dbginfo.num_instruction_bps = 0; - if (ppc_breakpoint_available()) - dbginfo.num_data_bps = 1; - else - dbginfo.num_data_bps = 0; - dbginfo.num_condition_regs = 0; - dbginfo.data_bp_alignment = sizeof(long); - dbginfo.sizeof_condition = 0; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; - if (dawr_enabled()) - dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; -#else - dbginfo.features = 0; -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + ppc_gethwdinfo(&dbginfo); if (copy_to_user(datavp, &dbginfo, sizeof(struct ppc_debug_info))) -- cgit v1.2.3-59-g8ed1b From ccbed90b8207851fa759e81cc9ab083f9b6496c1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 28 Feb 2020 00:14:49 +0000 Subject: powerpc/ptrace: move ptrace_triggered() into hw_breakpoint.c ptrace_triggered() is declared in asm/hw_breakpoint.h and only needed when CONFIG_HW_BREAKPOINT is set, so move it into hw_breakpoint.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8402c516023da1371953a65af7df2008758ea0c4.1582848567.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/hw_breakpoint.c | 16 ++++++++++++++++ arch/powerpc/kernel/ptrace/ptrace.c | 19 ------------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index d0854320bb50..72f461bd70fb 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -429,3 +429,19 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } + +void ptrace_triggered(struct perf_event *bp, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event_attr attr; + + /* + * Disable the breakpoint request here since ptrace has defined a + * one-shot behaviour for breakpoint exceptions in PPC64. + * The SIGTRAP signal is generated automatically for us in do_dabr(). + * We don't have to do anything about that here + */ + attr = bp->attr; + attr.disabled = true; + modify_user_hw_breakpoint(bp, &attr); +} diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index a44f6e5e05ff..f6e51be47c6e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -31,24 +30,6 @@ #include "ptrace-decl.h" -#ifdef CONFIG_HAVE_HW_BREAKPOINT -void ptrace_triggered(struct perf_event *bp, - struct perf_sample_data *data, struct pt_regs *regs) -{ - struct perf_event_attr attr; - - /* - * Disable the breakpoint request here since ptrace has defined a - * one-shot behaviour for breakpoint exceptions in PPC64. - * The SIGTRAP signal is generated automatically for us in do_dabr(). - * We don't have to do anything about that here - */ - attr = bp->attr; - attr.disabled = true; - modify_user_hw_breakpoint(bp, &attr); -} -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ - /* * Called by kernel/ptrace.c when detaching.. * -- cgit v1.2.3-59-g8ed1b From ead983604c5a390f1e3ce085945b60e82f08dbbe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 27 Feb 2020 15:59:32 +1100 Subject: powerpc/vmlinux.lds: Explicitly retain .gnu.hash Relocatable kernel builds produce a warning about .gnu.hash being an orphan section: ld: warning: orphan section `.gnu.hash' from `linker stubs' being placed in section `.gnu.hash' If we try to discard it the build fails: ld -EL -m elf64lppc -pie --orphan-handling=warn --build-id -o .tmp_vmlinux1 -T ./arch/powerpc/kernel/vmlinux.lds --whole-archive arch/powerpc/kernel/head_64.o arch/powerpc/kernel/entry_64.o ... sound/built-in.a net/built-in.a virt/built-in.a --no-whole-archive --start-group lib/lib.a --end-group ld: could not find section .gnu.hash So add an entry to explicitly retain it, as we do for .hash. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200227045933.22967-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a32d478a7f41..b17d665a4e07 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -256,6 +256,7 @@ SECTIONS *(.dynamic) } .hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) } + .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) } .interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) } .rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET) { -- cgit v1.2.3-59-g8ed1b From 9686813f6e9d5568bc045de0be853411e44958c8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 27 Mar 2020 20:53:19 +1100 Subject: selftests/powerpc: Fix try-run when source tree is not writable We added a usage of try-run to pmu/ebb/Makefile to detect if the toolchain supported the -no-pie option. This fails if we build out-of-tree and the source tree is not writable, as try-run tries to write its temporary files to the current directory. That leads to the -no-pie option being silently dropped, which leads to broken executables with some toolchains. If we remove the redirect to /dev/null in try-run, we see the error: make[3]: Entering directory '/linux/tools/testing/selftests/powerpc/pmu/ebb' /usr/bin/ld: cannot open output file .54.tmp: Read-only file system collect2: error: ld returned 1 exit status make[3]: Nothing to be done for 'all'. And looking with strace we see it's trying to use a file that's in the source tree: lstat("/linux/tools/testing/selftests/powerpc/pmu/ebb/.54.tmp", 0x7ffffc0f83c8) We can fix it by setting TMPOUT to point to the $(OUTPUT) directory, and we can verify with strace it's now trying to write to the output directory: lstat("/output/kselftest/powerpc/pmu/ebb/.54.tmp", 0x7fffd1bf6bf8) And also see that the -no-pie option is now correctly detected. Fixes: 0695f8bca93e ("selftests/powerpc: Handle Makefile for unrecognized option") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200327095319.2347641-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/pmu/ebb/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 417306353e07..ca35dd8848b0 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -7,6 +7,7 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 +TMPOUT = $(OUTPUT)/ # Toolchains may build PIE by default which breaks the assembly no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) -- cgit v1.2.3-59-g8ed1b From 32377bd2cbb62e23ac0a1aaaf0048957c5fd9f02 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Tue, 26 Nov 2019 19:21:14 +0530 Subject: powerpc/powernv: Add documentation for the opal sensor_groups sysfs interfaces Commit bf9571550f52 ("powerpc/powernv: Add support to clear sensor groups data") added a mechanism to clear sensor-group data via a sysfs interface. However, the ABI for that interface has not been documented. This patch documents the ABI for the sysfs interface for sensor-groups and clearing the sensor-groups. This patch was originally sent by Shilpasri G Bhat on the mailing list: https://lkml.org/lkml/2018/8/1/85 Signed-off-by: Shilpasri G Bhat Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1574776274-22355-1-git-send-email-ego@linux.vnet.ibm.com --- .../ABI/testing/sysfs-firmware-opal-sensor-groups | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups new file mode 100644 index 000000000000..3a2dfe542e8c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups @@ -0,0 +1,21 @@ +What: /sys/firmware/opal/sensor_groups +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: Sensor groups directory for POWER9 powernv servers + + Each folder in this directory contains a sensor group + which are classified based on type of the sensor + like power, temperature, frequency, current, etc. They + can also indicate the group of sensors belonging to + different owners like CSM, Profiler, Job-Scheduler + +What: /sys/firmware/opal/sensor_groups//clear +Date: August 2017 +Contact: Linux for PowerPC mailing list +Description: Sysfs file to clear the min-max of all the sensors + belonging to the group. + + Writing 1 to this file will clear the minimum and + maximum values of all the sensors in the group. + In POWER9, the min-max of a sensor is the historical minimum + and maximum value of the sensor cached by OCC. -- cgit v1.2.3-59-g8ed1b From b77afad84e1eedca03658ae1478ce5b8ed5aa18c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 25 Nov 2019 11:20:33 +0200 Subject: powerpc/32: drop unused ISA_DMA_THRESHOLD The ISA_DMA_THRESHOLD variable is set by several platforms but never referenced. Remove it. Signed-off-by: Mike Rapoport Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191125092033.20014-1-rppt@kernel.org --- arch/powerpc/include/asm/dma.h | 3 +-- arch/powerpc/kernel/setup_32.c | 1 - arch/powerpc/platforms/44x/warp.c | 3 --- arch/powerpc/platforms/52xx/efika.c | 1 - arch/powerpc/platforms/amigaone/setup.c | 1 - arch/powerpc/platforms/chrp/setup.c | 1 - arch/powerpc/platforms/powermac/setup.c | 1 - 7 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/dma.h b/arch/powerpc/include/asm/dma.h index 1b4f0254868f..6161a9596196 100644 --- a/arch/powerpc/include/asm/dma.h +++ b/arch/powerpc/include/asm/dma.h @@ -151,10 +151,9 @@ #define DMA2_EXT_REG 0x4D6 #ifndef __powerpc64__ - /* in arch/ppc/kernel/setup.c -- Cort */ + /* in arch/powerpc/kernel/setup_32.c -- Cort */ extern unsigned int DMA_MODE_WRITE; extern unsigned int DMA_MODE_READ; - extern unsigned long ISA_DMA_THRESHOLD; #else #define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ #define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5b49b26eb154..305ca89d856f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid_phys); int smp_hw_index[NR_CPUS]; EXPORT_SYMBOL(smp_hw_index); -unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 6620b64e4963..665f18e37efb 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -43,9 +43,6 @@ static int __init warp_probe(void) if (!of_machine_is_compatible("pika,warp")) return 0; - /* For arch_dma_alloc */ - ISA_DMA_THRESHOLD = ~0L; - return 1; } diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 61538869e88a..4514a6f7458a 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -205,7 +205,6 @@ static int __init efika_probe(void) if (strcmp(model, "EFIKA5K2")) return 0; - ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index ea5e45e32683..f5d0bf999759 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -146,7 +146,6 @@ static int __init amigaone_probe(void) */ cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT; - ISA_DMA_THRESHOLD = 0x00ffffff; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 8328cd5817b0..65a7e01a8f7d 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -569,7 +569,6 @@ static int __init chrp_probe(void) if (strcmp(dtype, "chrp")) return 0; - ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index c6d5333729ed..95fb4feb6ccc 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -586,7 +586,6 @@ static int __init pmac_probe(void) #ifdef CONFIG_PPC32 /* isa_io_base gets set in pmac_pci_init */ - ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 1; DMA_MODE_WRITE = 2; #endif /* CONFIG_PPC32 */ -- cgit v1.2.3-59-g8ed1b From c04868df38d8d6239ef0f36f45dbba2624e6a9cb Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 5 Mar 2020 20:05:29 +0530 Subject: powerpc: Drop -fno-dwarf2-cfi-asm The original commit/discussion adding -fno-dwarf2-cfi-asm refers to R_PPC64_REL32 relocations not being handled by our module loader: http://lkml.kernel.org/r/20090224065112.GA6690@bombadil.infradead.org However, that is now handled thanks to commit 9f751b82b491d ("powerpc/module: Add support for R_PPC64_REL32 relocations"). So, drop this flag from our Makefile. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9b22a064de6eb1301d92177eb3a38559df7005d3.1583415544.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index cbe5ca4f0ee5..89956c4f1ce3 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -239,11 +239,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx) KBUILD_CFLAGS += $(call cc-option,-mno-spe) KBUILD_CFLAGS += $(call cc-option,-mspe=no) -# FIXME: the module load should be taught about the additional relocs -# generated by this. -# revert to pre-gcc-4.4 behaviour of .eh_frame -KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) - # Never use string load/store instructions as they are # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -- cgit v1.2.3-59-g8ed1b From ba96301ce9be7925cdaee677b1a2ff8eddba9fd4 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 5 Mar 2020 20:05:30 +0530 Subject: powerpc: Suppress .eh_frame generation GCC v8 defaults to enabling -fasynchronous-unwind-tables due to https://gcc.gnu.org/r259298, which results in .eh_frame section being generated. This results in additional disk usage by the build, as well as the kernel modules. Since the kernel has no use for this, this section is discarded. Add -fno-asynchronous-unwind-tables to KBUILD_CFLAGS to suppress generation of .eh_frame section. Note that our VDSOs need .eh_frame, but are not affected by this change since our VDSO code are all in assembly. Reported-by: Rasmus Villemoes Signed-off-by: Naveen N. Rao Reviewed-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1ed7cd84a7d1a3180b30c0c60e70eed8bb8b40c3.1583415544.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 89956c4f1ce3..f310c32e88a4 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -239,6 +239,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx) KBUILD_CFLAGS += $(call cc-option,-mno-spe) KBUILD_CFLAGS += $(call cc-option,-mspe=no) +# Don't emit .eh_frame since we have no use for it +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + # Never use string load/store instructions as they are # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -- cgit v1.2.3-59-g8ed1b From 41b8426fdb59218f56a6e3b3facd43a82816e3eb Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Sat, 15 Feb 2020 02:36:37 -0300 Subject: powerpc/cputable: Remove unnecessary copy of cpu_spec->oprofile_type Before checking for cpu_type == NULL, this same copy happens, so doing it here will just write the same value to the t->oprofile_type again. Remove the repeated copy, as it is unnecessary. Signed-off-by: Leonardo Bras Reviewed-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200215053637.280880-1-leonardo@linux.ibm.com --- arch/powerpc/kernel/cputable.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 245be4fafe13..13eba2eb46fe 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2198,7 +2198,6 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; - t->oprofile_type = old.oprofile_type; t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } -- cgit v1.2.3-59-g8ed1b From c17eb4dca5a353a9dbbb8ad6934fe57af7165e91 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Mon, 30 Mar 2020 10:03:56 +0200 Subject: powerpc: Make setjmp/longjmp signature standard Declaring setjmp()/longjmp() as taking longs makes the signature non-standard, and makes clang complain. In the past, this has been worked around by adding -ffreestanding to the compile flags. The implementation looks like it only ever propagates the value (in longjmp) or sets it to 1 (in setjmp), and we only call longjmp with integer parameters. This allows removing -ffreestanding from the compilation flags. Fixes: c9029ef9c957 ("powerpc: Avoid clang warnings around setjmp and longjmp") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Clement Courbet Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200330080400.124803-1-courbet@google.com --- arch/powerpc/include/asm/setjmp.h | 6 ++++-- arch/powerpc/kexec/Makefile | 3 --- arch/powerpc/xmon/Makefile | 3 --- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index e9f81bb3f83b..f798e80e4106 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -7,7 +7,9 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *) __attribute__((returns_twice)); -extern void longjmp(long *, long) __attribute__((noreturn)); +typedef long jmp_buf[JMP_BUF_LEN]; + +extern int setjmp(jmp_buf env) __attribute__((returns_twice)); +extern void longjmp(jmp_buf env, int val) __attribute__((noreturn)); #endif /* _ASM_POWERPC_SETJMP_H */ diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 378f6108a414..86380c69f5ce 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -# Avoid clang warnings around longjmp/setjmp declarations -CFLAGS_crash.o += -ffreestanding - obj-y += core.o crash.o core_$(BITS).o obj-$(CONFIG_PPC32) += relocate_32.o diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index c3842dbeb1b7..6f9cccea54f3 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Avoid clang warnings around longjmp/setjmp declarations -subdir-ccflags-y := -ffreestanding - GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -- cgit v1.2.3-59-g8ed1b