aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig27
-rw-r--r--arch/powerpc/platforms/pseries/Makefile12
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c64
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c88
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c11
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c2
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c26
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c1
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S4
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c6
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c102
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c10
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c86
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c104
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c115
-rw-r--r--arch/powerpc/platforms/pseries/msi.c39
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c2
-rw-r--r--arch/powerpc/platforms/pseries/papr_platform_attributes.c362
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c285
-rw-r--r--arch/powerpc/platforms/pseries/pci.c1
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/plpks.c461
-rw-r--r--arch/powerpc/platforms/pseries/plpks.h71
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c1
-rw-r--r--arch/powerpc/platforms/pseries/power.c2
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h11
-rw-r--r--arch/powerpc/platforms/pseries/ras.c70
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c6
-rw-r--r--arch/powerpc/platforms/pseries/rng.c11
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c23
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c195
-rw-r--r--arch/powerpc/platforms/pseries/setup.c110
-rw-r--r--arch/powerpc/platforms/pseries/smp.c1
-rw-r--r--arch/powerpc/platforms/pseries/svm.c26
-rw-r--r--arch/powerpc/platforms/pseries/vas-sysfs.c281
-rw-r--r--arch/powerpc/platforms/pseries/vas.c578
-rw-r--r--arch/powerpc/platforms/pseries/vas.h36
-rw-r--r--arch/powerpc/platforms/pseries/vio.c7
41 files changed, 2674 insertions, 574 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 2e57391e0778..a3b4d99567cb 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,21 +17,27 @@ config PPC_PSERIES
select PPC_RTAS_DAEMON
select RTAS_ERROR_LOGGING
select PPC_UDBG_16550
- select PPC_NATIVE
select PPC_DOORBELL
select HOTPLUG_CPU
- select ARCH_RANDOM
select FORCE_SMP
select SWIOTLB
default y
+config PARAVIRT
+ bool
+
config PARAVIRT_SPINLOCKS
bool
+config PARAVIRT_TIME_ACCOUNTING
+ select PARAVIRT
+ bool
+
config PPC_SPLPAR
bool "Support for shared-processor logical partitions"
depends on PPC_PSERIES
select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+ select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
default y
help
Enabling this option will make the kernel run more efficiently
@@ -61,10 +67,6 @@ config PSERIES_ENERGY
Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
-config SCANLOG
- tristate "Scanlog dump interface"
- depends on RTAS_PROC && PPC_PSERIES
-
config IO_EVENT_IRQ
bool "IO Event Interrupt support"
depends on PPC_PSERIES
@@ -147,6 +149,19 @@ config IBMEBUS
help
Bus device driver for GX bus based adapters.
+config PSERIES_PLPKS
+ depends on PPC_PSERIES
+ bool "Support for the Platform Key Storage"
+ help
+ PowerVM provides an isolated Platform Keystore(PKS) storage
+ allocation for each LPAR with individually managed access
+ controls to store sensitive information securely. It can be
+ used to store asymmetric public keys or secrets as required
+ by different usecases. Select this config to enable
+ operating system interface to hypervisor to access this space.
+
+ If unsure, select N.
+
config PAPR_SCM
depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM
tristate "Support for the PAPR Storage Class Memory interface"
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 41d8aee98da4..92310202bdd7 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,9 +6,9 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
of_helpers.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o rng.o \
- pci.o pci_dlpar.o eeh_pseries.o msi.o
+ pci.o pci_dlpar.o eeh_pseries.o msi.o \
+ papr_platform_attributes.o dtl.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SCANLOG) += scanlog.o
obj-$(CONFIG_KEXEC_CORE) += kexec.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
@@ -19,7 +19,6 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
obj-$(CONFIG_HVCS) += hvcserver.o
obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
obj-$(CONFIG_CMM) += cmm.o
-obj-$(CONFIG_DTL) += dtl.o
obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
@@ -28,8 +27,13 @@ obj-$(CONFIG_PAPR_SCM) += papr_scm.o
obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_SVM) += svm.o
obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
+obj-$(CONFIG_PSERIES_PLPKS) += plpks.o
obj-$(CONFIG_SUSPEND) += suspend.o
-obj-$(CONFIG_PPC_VAS) += vas.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o
obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
+
+# nothing that operates in real mode is safe for KASAN
+KASAN_SANITIZE_ras.o := n
+KASAN_SANITIZE_kexec.o := n
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 45a3a3022a85..5f4037c1d7fe 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -19,9 +19,6 @@
#include <linux/stringify.h>
#include <linux/swap.h>
#include <linux/device.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
-#include <linux/magic.h>
#include <linux/balloon_compaction.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
@@ -475,8 +472,6 @@ static struct notifier_block cmm_reboot_nb = {
static int cmm_memory_cb(struct notifier_block *self,
unsigned long action, void *arg)
{
- int ret = 0;
-
switch (action) {
case MEM_GOING_OFFLINE:
mutex_lock(&hotplug_mutex);
@@ -493,7 +488,7 @@ static int cmm_memory_cb(struct notifier_block *self,
break;
}
- return notifier_from_errno(ret);
+ return NOTIFY_OK;
}
static struct notifier_block cmm_mem_nb = {
@@ -502,19 +497,6 @@ static struct notifier_block cmm_mem_nb = {
};
#ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-
-static int cmm_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
- .name = "ppc-cmm",
- .init_fs_context = cmm_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
struct page *newpage, struct page *page,
enum migrate_mode mode)
@@ -566,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
return MIGRATEPAGE_SUCCESS;
}
-static int cmm_balloon_compaction_init(void)
+static void cmm_balloon_compaction_init(void)
{
- int rc;
-
balloon_devinfo_init(&b_dev_info);
b_dev_info.migratepage = cmm_migratepage;
-
- balloon_mnt = kern_mount(&balloon_fs);
- if (IS_ERR(balloon_mnt)) {
- rc = PTR_ERR(balloon_mnt);
- balloon_mnt = NULL;
- return rc;
- }
-
- b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
- if (IS_ERR(b_dev_info.inode)) {
- rc = PTR_ERR(b_dev_info.inode);
- b_dev_info.inode = NULL;
- kern_unmount(balloon_mnt);
- balloon_mnt = NULL;
- return rc;
- }
-
- b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
- return 0;
-}
-static void cmm_balloon_compaction_deinit(void)
-{
- if (b_dev_info.inode)
- iput(b_dev_info.inode);
- b_dev_info.inode = NULL;
- kern_unmount(balloon_mnt);
- balloon_mnt = NULL;
}
#else /* CONFIG_BALLOON_COMPACTION */
-static int cmm_balloon_compaction_init(void)
-{
- return 0;
-}
-
-static void cmm_balloon_compaction_deinit(void)
+static void cmm_balloon_compaction_init(void)
{
}
#endif /* CONFIG_BALLOON_COMPACTION */
@@ -624,9 +572,7 @@ static int cmm_init(void)
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
return -EOPNOTSUPP;
- rc = cmm_balloon_compaction_init();
- if (rc)
- return rc;
+ cmm_balloon_compaction_init();
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
@@ -660,7 +606,6 @@ out_reboot_notifier:
out_oom_notifier:
unregister_oom_notifier(&cmm_oom_nb);
out_balloon_compaction:
- cmm_balloon_compaction_deinit();
return rc;
}
@@ -679,7 +624,6 @@ static void cmm_exit(void)
unregister_memory_notifier(&cmm_mem_nb);
cmm_free_pages(atomic_long_read(&loaned_pages));
cmm_unregister_sysfs(&cmm_dev);
- cmm_balloon_compaction_deinit();
}
/**
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index b1f01ac0c29e..498d6efcb5ae 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -19,7 +19,6 @@
#include "of_helpers.h"
#include "pseries.h"
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <linux/uaccess.h>
#include <asm/rtas.h>
@@ -389,7 +388,7 @@ static void pseries_hp_work_fn(struct work_struct *work)
handle_dlpar_errorlog(hp_work->errlog);
kfree(hp_work->errlog);
- kfree((void *)work);
+ kfree(work);
}
void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 352af5b14a0f..3f1cdccebc9c 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -18,6 +18,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/machdep.h>
+#ifdef CONFIG_DTL
struct dtl {
struct dtl_entry *buf;
int cpu;
@@ -37,6 +38,15 @@ static u8 dtl_event_mask = DTL_LOG_ALL;
static int dtl_buf_entries = N_DISPATCH_LOG;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log. If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+static void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
struct dtl_ring {
u64 write_index;
struct dtl_entry *write_ptr;
@@ -355,3 +365,81 @@ static int dtl_init(void)
return 0;
}
machine_arch_initcall(pseries, dtl_init);
+#endif /* CONFIG_DTL */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Scan the dispatch trace log and count up the stolen time.
+ * Should be called with interrupts disabled.
+ */
+static notrace u64 scan_dispatch_log(u64 stop_tb)
+{
+ u64 i = local_paca->dtl_ridx;
+ struct dtl_entry *dtl = local_paca->dtl_curr;
+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+ u64 tb_delta;
+ u64 stolen = 0;
+ u64 dtb;
+
+ if (!dtl)
+ return 0;
+
+ if (i == be64_to_cpu(vpa->dtl_idx))
+ return 0;
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
+ dtb = be64_to_cpu(dtl->timebase);
+ tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
+ be32_to_cpu(dtl->ready_to_enqueue_time);
+ barrier();
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+ /* buffer has overflowed */
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ continue;
+ }
+ if (dtb > stop_tb)
+ break;
+#ifdef CONFIG_DTL
+ if (dtl_consumer)
+ dtl_consumer(dtl, i);
+#endif
+ stolen += tb_delta;
+ ++i;
+ ++dtl;
+ if (dtl == dtl_end)
+ dtl = local_paca->dispatch_log;
+ }
+ local_paca->dtl_ridx = i;
+ local_paca->dtl_curr = dtl;
+ return stolen;
+}
+
+/*
+ * Accumulate stolen time by scanning the dispatch trace log.
+ * Called on entry from user mode.
+ */
+void notrace pseries_accumulate_stolen_time(void)
+{
+ u64 sst, ust;
+ struct cpu_accounting_data *acct = &local_paca->accounting;
+
+ sst = scan_dispatch_log(acct->starttime_user);
+ ust = scan_dispatch_log(acct->starttime);
+ acct->stime -= sst;
+ acct->utime -= ust;
+ acct->steal_time += ust + sst;
+}
+
+u64 pseries_calculate_stolen_time(u64 stop_tb)
+{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
+ if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
+ return scan_dispatch_log(stop_tb);
+
+ return 0;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 09fafcf2d3a0..8e40ccac0f44 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -43,6 +43,8 @@ static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_pe;
+static void pseries_eeh_init_edev(struct pci_dn *pdn);
+
static void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
@@ -69,7 +71,7 @@ static void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
if (pdev->is_virtfn) {
/*
* FIXME: This really should be handled by choosing the right
- * parent PE in in pseries_eeh_init_edev().
+ * parent PE in pseries_eeh_init_edev().
*/
struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe;
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -359,7 +361,7 @@ static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
* This function takes care of the initialisation and inserts the eeh_dev
* into the correct eeh_pe. If no eeh_pe exists we'll allocate one.
*/
-void pseries_eeh_init_edev(struct pci_dn *pdn)
+static void pseries_eeh_init_edev(struct pci_dn *pdn)
{
struct eeh_pe pe, *parent;
struct eeh_dev *edev;
@@ -510,7 +512,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
int ret = 0;
/*
- * When we're enabling or disabling EEH functioality on
+ * When we're enabling or disabling EEH functionality on
* the particular PE, the PE config address is possibly
* unavailable. Therefore, we have to figure it out from
* the FDT node.
@@ -845,8 +847,7 @@ static int __init eeh_pseries_init(void)
return -EINVAL;
}
- /* Initialize error log lock and size */
- spin_lock_init(&slot_errbuf_lock);
+ /* Initialize error log size */
eeh_error_buf_size = rtas_token("rtas-error-log-max");
if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
pr_info("%s: unknown EEH error log size\n",
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index be661e919c76..623dfe0d8e1c 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -8,7 +8,7 @@
#include "pseries.h"
-void request_event_sources_irqs(struct device_node *np,
+void __init request_event_sources_irqs(struct device_node *np,
irq_handler_t handler,
const char *name)
{
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index f162156b7b68..080108d129ed 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -66,6 +66,8 @@ hypertas_fw_features_table[] = {
{FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"},
{FW_FEATURE_PAPR_SCM, "hcall-scm"},
{FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"},
+ {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"},
+ {FW_FEATURE_WATCHDOG, "hcall-watchdog"},
};
/* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 5ab44600c8d3..e0a7ac5db15d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -398,7 +398,7 @@ static int dlpar_online_cpu(struct device_node *dn)
if (get_hard_smp_processor_id(cpu) != thread)
continue;
cpu_maps_update_done();
- find_and_online_cpu_nid(cpu);
+ find_and_update_cpu_nid(cpu);
rc = device_online(get_cpu_device(cpu));
if (rc) {
dlpar_offline_cpu(dn);
@@ -619,17 +619,21 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn)
{
unsigned int use_count = 0;
- struct device_node *dn;
+ struct device_node *dn, *tn;
WARN_ON(!of_node_is_type(cachedn, "cache"));
for_each_of_cpu_node(dn) {
- if (of_find_next_cache_node(dn) == cachedn)
+ tn = of_find_next_cache_node(dn);
+ of_node_put(tn);
+ if (tn == cachedn)
use_count++;
}
for_each_node_by_type(dn, "cache") {
- if (of_find_next_cache_node(dn) == cachedn)
+ tn = of_find_next_cache_node(dn);
+ of_node_put(tn);
+ if (tn == cachedn)
use_count++;
}
@@ -649,10 +653,13 @@ static int pseries_cpuhp_detach_nodes(struct device_node *cpudn)
dn = cpudn;
while ((dn = of_find_next_cache_node(dn))) {
- if (pseries_cpuhp_cache_use_count(dn) > 1)
+ if (pseries_cpuhp_cache_use_count(dn) > 1) {
+ of_node_put(dn);
break;
+ }
ret = of_changeset_detach_node(&cs, dn);
+ of_node_put(dn);
if (ret)
goto out;
}
@@ -864,12 +871,13 @@ static int __init pseries_cpu_hotplug_init(void)
/* Processors can be added/removed only on LPAR */
if (firmware_has_feature(FW_FEATURE_LPAR)) {
for_each_node(node) {
- alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
+ if (!alloc_cpumask_var_node(&node_recorded_ids_map[node],
+ GFP_KERNEL, node))
+ return -ENOMEM;
/* Record ids of CPU added at boot time */
- cpumask_or(node_recorded_ids_map[node],
- node_recorded_ids_map[node],
- cpumask_of_node(node));
+ cpumask_copy(node_recorded_ids_map[node],
+ cpumask_of_node(node));
}
of_reconfig_notifier_register(&pseries_smp_nb);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 91cf23495ccb..2e3a317722a8 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -16,7 +16,6 @@
#include <asm/firmware.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/sparsemem.h>
#include <asm/fadump.h>
#include <asm/drmem.h>
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index ab9fc6506861..762eb15d3bd4 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -16,7 +16,7 @@
#ifdef CONFIG_TRACEPOINTS
#ifndef CONFIG_JUMP_LABEL
- .section ".toc","aw"
+ .data
.globl hcall_tracepoint_refcount
hcall_tracepoint_refcount:
@@ -88,7 +88,7 @@ hcall_tracepoint_refcount:
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
- ld r12,hcall_tracepoint_refcount@toc(r2); \
+ LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \
std r12,32(r1); \
cmpdi r12,0; \
bne- LABEL; \
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 96e18d3b2fcf..d48c9c7ce10f 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -176,7 +176,7 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
= (unsigned int)last_p_partition_ID;
/* copy the Null-term char too */
- strlcpy(&next_partner_info->location_code[0],
+ strscpy(&next_partner_info->location_code[0],
(char *)&pi_buff[2],
sizeof(next_partner_info->location_code));
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 7ee3ed7d6cc2..a870cada7acd 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -152,7 +152,11 @@ static const struct dma_map_ops ibmebus_dma_ops = {
static int ibmebus_match_path(struct device *dev, const void *data)
{
struct device_node *dn = to_platform_device(dev)->dev.of_node;
- return (of_find_node_by_path(data) == dn);
+ struct device_node *tn = of_find_node_by_path(data);
+
+ of_node_put(tn);
+
+ return (tn == dn);
}
static int ibmebus_match_node(struct device *dev, const void *data)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8f998e55735b..561adac69022 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -666,8 +666,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
#ifdef CONFIG_IOMMU_API
static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
- long *tce, enum dma_data_direction *direction,
- bool realmode)
+ long *tce, enum dma_data_direction *direction)
{
long rc;
unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@@ -701,6 +700,33 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
.get = tce_get_pSeriesLP
};
+/*
+ * Find nearest ibm,dma-window (default DMA window) or direct DMA window or
+ * dynamic 64bit DMA window, walking up the device tree.
+ */
+static struct device_node *pci_dma_find(struct device_node *dn,
+ const __be32 **dma_window)
+{
+ const __be32 *dw = NULL;
+
+ for ( ; dn && PCI_DN(dn); dn = dn->parent) {
+ dw = of_get_property(dn, "ibm,dma-window", NULL);
+ if (dw) {
+ if (dma_window)
+ *dma_window = dw;
+ return dn;
+ }
+ dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+ if (dw)
+ return dn;
+ dw = of_get_property(dn, DMA64_PROPNAME, NULL);
+ if (dw)
+ return dn;
+ }
+
+ return NULL;
+}
+
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
{
struct iommu_table *tbl;
@@ -713,20 +739,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
dn);
- /*
- * Find nearest ibm,dma-window (default DMA window), walking up the
- * device tree
- */
- for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window != NULL)
- break;
- }
+ pdn = pci_dma_find(dn, &dma_window);
- if (dma_window == NULL) {
+ if (dma_window == NULL)
pr_debug(" no ibm,dma-window property !\n");
- return;
- }
ppci = PCI_DN(pdn);
@@ -736,11 +752,13 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
tbl = ppci->table_group->tables[0];
- iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
- ppci->table_group, dma_window);
+ if (dma_window) {
+ iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
+ ppci->table_group, dma_window);
- if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
- panic("Failed to initialize iommu table");
+ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+ }
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -1022,9 +1040,6 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
- ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
- BUID_LO(buid), ret);
switch (out_sz) {
case 5:
@@ -1042,6 +1057,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
break;
}
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret, query->largest_available_block,
+ query->page_size, query->windows_available);
+
return ret;
}
@@ -1233,7 +1253,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
bool default_win_removed = false, direct_mapping = false;
bool pmem_present;
struct pci_dn *pci = PCI_DN(pdn);
- struct iommu_table *tbl = pci->table_group->tables[0];
+ struct property *default_win = NULL;
dn = of_find_node_by_type(NULL, "ibm,pmemory");
pmem_present = dn != NULL;
@@ -1290,11 +1310,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* for extensions presence.
*/
if (query.windows_available == 0) {
- struct property *default_win;
int reset_win_ext;
/* DDW + IOMMU on single window may fail if there is any allocation */
- if (iommu_table_in_use(tbl)) {
+ if (iommu_table_in_use(pci->table_group->tables[0])) {
dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
goto out_failed;
}
@@ -1430,16 +1449,18 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
pci->table_group->tables[1] = newtbl;
- /* Keep default DMA window stuct if removed */
- if (default_win_removed) {
- tbl->it_size = 0;
- vfree(tbl->it_map);
- tbl->it_map = NULL;
- }
-
set_iommu_table_base(&dev->dev, newtbl);
}
+ if (default_win_removed) {
+ iommu_tce_table_put(pci->table_group->tables[0]);
+ pci->table_group->tables[0] = NULL;
+
+ /* default_win is valid here because default_win_removed == true */
+ of_remove_property(pdn, default_win);
+ dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn);
+ }
+
spin_lock(&dma_win_list_lock);
list_add(&window->list, &dma_win_list);
spin_unlock(&dma_win_list_lock);
@@ -1504,13 +1525,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %pOF\n", dn);
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
- pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window)
- break;
- }
-
+ pdn = pci_dma_find(dn, &dma_window);
if (!pdn || !PCI_DN(pdn)) {
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
"no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1541,7 +1556,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
{
struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
- const __be32 *dma_window = NULL;
/* only attempt to use a new window if 64-bit DMA is requested */
if (dma_mask < DMA_BIT_MASK(64))
@@ -1555,13 +1569,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
- pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window)
- break;
- }
-
+ pdn = pci_dma_find(dn, NULL);
if (pdn && PCI_DN(pdn))
return enable_ddw(pdev, pdn);
@@ -1654,7 +1662,7 @@ static struct notifier_block iommu_reconfig_nb = {
};
/* These are called very early. */
-void iommu_init_early_pSeries(void)
+void __init iommu_init_early_pSeries(void)
{
if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
return;
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 145fcfbc017f..096d09ed89f6 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -6,7 +6,7 @@
#include <linux/kernel.h>
#include <linux/interrupt.h>
-#include <asm/machdep.h>
+#include <asm/setup.h>
#include <asm/page.h>
#include <asm/firmware.h>
#include <asm/kexec.h>
@@ -61,3 +61,11 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
} else
xics_kexec_teardown_cpu(secondary);
}
+
+void pseries_machine_kexec(struct kimage *image)
+{
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+
+ default_machine_kexec(image);
+}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3df6bdfea475..97ef6499e501 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -27,11 +27,10 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
-#include <asm/machdep.h>
+#include <asm/setup.h>
#include <asm/mmu_context.h>
#include <asm/iommu.h>
#include <asm/tlb.h>
-#include <asm/prom.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
#include <asm/smp.h>
@@ -40,7 +39,6 @@
#include <asm/plpar_wrappers.h>
#include <asm/kexec.h>
#include <asm/fadump.h>
-#include <asm/asm-prototypes.h>
#include <asm/dtl.h>
#include "pseries.h"
@@ -58,6 +56,7 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* H_BLOCK_REMOVE supported block size for this page size in segment who's base
* page size is that page size.
@@ -66,6 +65,7 @@ EXPORT_SYMBOL(plpar_hcall_norets);
* page size.
*/
static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+#endif
/*
* Due to the involved complexity, and that the current hypervisor is only
@@ -660,6 +660,17 @@ static int __init vcpudispatch_stats_procfs_init(void)
}
machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+u64 pseries_paravirt_steal_clock(int cpu)
+{
+ struct lppaca *lppaca = &lppaca_of(cpu);
+
+ return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+ be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+}
+#endif
+
#endif /* CONFIG_PPC_SPLPAR */
void vpa_init(int cpu)
@@ -689,7 +700,7 @@ void vpa_init(int cpu)
return;
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
@@ -702,7 +713,7 @@ void vpa_init(int cpu)
"cpu %d (hw %d) of area %lx failed with %ld\n",
cpu, hwcpu, addr, ret);
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
/*
* Register dispatch trace log, if one has been allocated.
@@ -712,6 +723,36 @@ void vpa_init(int cpu)
#ifdef CONFIG_PPC_BOOK3S_64
+static int __init pseries_lpar_register_process_table(unsigned long base,
+ unsigned long page_size, unsigned long table_size)
+{
+ long rc;
+ unsigned long flags = 0;
+
+ if (table_size)
+ flags |= PROC_TABLE_NEW;
+ if (radix_enabled()) {
+ flags |= PROC_TABLE_RADIX;
+ if (mmu_has_feature(MMU_FTR_GTSE))
+ flags |= PROC_TABLE_GTSE;
+ } else
+ flags |= PROC_TABLE_HPT_SLB;
+ for (;;) {
+ rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
+ page_size, table_size);
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+ if (rc != H_SUCCESS) {
+ pr_err("Failed to register process table (rc=%ld)\n", rc);
+ BUG();
+ }
+ return rc;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+
static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
unsigned long vpn, unsigned long pa,
unsigned long rflags, unsigned long vflags,
@@ -1680,34 +1721,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
return 0;
}
-static int pseries_lpar_register_process_table(unsigned long base,
- unsigned long page_size, unsigned long table_size)
-{
- long rc;
- unsigned long flags = 0;
-
- if (table_size)
- flags |= PROC_TABLE_NEW;
- if (radix_enabled()) {
- flags |= PROC_TABLE_RADIX;
- if (mmu_has_feature(MMU_FTR_GTSE))
- flags |= PROC_TABLE_GTSE;
- } else
- flags |= PROC_TABLE_HPT_SLB;
- for (;;) {
- rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
- page_size, table_size);
- if (!H_IS_LONG_BUSY(rc))
- break;
- mdelay(get_longbusy_msecs(rc));
- }
- if (rc != H_SUCCESS) {
- pr_err("Failed to register process table (rc=%ld)\n", rc);
- BUG();
- }
- return rc;
-}
-
void __init hpte_init_pseries(void)
{
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
@@ -1730,9 +1743,10 @@ void __init hpte_init_pseries(void)
if (cpu_has_feature(CPU_FTR_ARCH_300))
pseries_lpar_register_process_table(0, 0, 0);
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_RADIX_MMU
-void radix_init_pseries(void)
+void __init radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
@@ -1932,7 +1946,8 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
return rc;
}
-static unsigned long vsid_unscramble(unsigned long vsid, int ssize)
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize)
{
unsigned long protovsid;
unsigned long va_bits = VA_BITS;
@@ -1992,6 +2007,7 @@ static int __init reserve_vrma_context_id(void)
return 0;
}
machine_device_initcall(pseries, reserve_vrma_context_id);
+#endif
#ifdef CONFIG_DEBUG_FS
/* debugfs file interface for vpa data */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index f71eac74ea92..63fd925ccbb8 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -28,7 +28,6 @@
#include <asm/firmware.h>
#include <asm/rtas.h>
#include <asm/time.h>
-#include <asm/prom.h>
#include <asm/vdso_datapage.h>
#include <asm/vio.h>
#include <asm/mmu.h>
@@ -36,6 +35,7 @@
#include <asm/drmem.h>
#include "pseries.h"
+#include "vas.h" /* pseries_vas_dlpar_cpu() */
/*
* This isn't a module but we expose that to userspace
@@ -311,6 +311,92 @@ static void parse_mpp_x_data(struct seq_file *m)
seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
}
+/*
+ * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to
+ * read the LPAR name, and the largest output data to 4000 + 2 bytes length.
+ */
+#define SPLPAR_LPAR_NAME_TOKEN 55
+#define GET_SYS_PARM_BUF_SIZE 4002
+#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE
+#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE"
+#endif
+
+/*
+ * Read the lpar name using the RTAS ibm,get-system-parameter call.
+ *
+ * The name read through this call is updated if changes are made by the end
+ * user on the hypervisor side.
+ *
+ * Some hypervisor (like Qemu) may not provide this value. In that case, a non
+ * null value is returned.
+ */
+static int read_rtas_lpar_name(struct seq_file *m)
+{
+ int rc, len, token;
+ union {
+ char raw_buffer[GET_SYS_PARM_BUF_SIZE];
+ struct {
+ __be16 len;
+ char name[GET_SYS_PARM_BUF_SIZE-2];
+ };
+ } *local_buffer;
+
+ token = rtas_token("ibm,get-system-parameter");
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL);
+ if (!local_buffer)
+ return -ENOMEM;
+
+ do {
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, sizeof(*local_buffer));
+ rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN,
+ __pa(rtas_data_buf), sizeof(*local_buffer));
+ if (!rc)
+ memcpy(local_buffer->raw_buffer, rtas_data_buf,
+ sizeof(local_buffer->raw_buffer));
+ spin_unlock(&rtas_data_buf_lock);
+ } while (rtas_busy_delay(rc));
+
+ if (!rc) {
+ /* Force end of string */
+ len = min((int) be16_to_cpu(local_buffer->len),
+ (int) sizeof(local_buffer->name)-1);
+ local_buffer->name[len] = '\0';
+
+ seq_printf(m, "partition_name=%s\n", local_buffer->name);
+ } else
+ rc = -ENODATA;
+
+ kfree(local_buffer);
+ return rc;
+}
+
+/*
+ * Read the LPAR name from the Device Tree.
+ *
+ * The value read in the DT is not updated if the end-user is touching the LPAR
+ * name on the hypervisor side.
+ */
+static int read_dt_lpar_name(struct seq_file *m)
+{
+ const char *name;
+
+ if (of_property_read_string(of_root, "ibm,partition-name", &name))
+ return -ENOENT;
+
+ seq_printf(m, "partition_name=%s\n", name);
+ return 0;
+}
+
+static void read_lpar_name(struct seq_file *m)
+{
+ if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
+ pr_err_once("Error can't get the LPAR name");
+}
+
#define SPLPAR_CHARACTERISTICS_TOKEN 20
#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
@@ -496,6 +582,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
/* this call handles the ibm,get-system-parameter contents */
+ read_lpar_name(m);
parse_system_parameter_string(m);
parse_ppp_data(m);
parse_mpp_data(m);
@@ -531,8 +618,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "shared_processor_mode=%d\n",
lppaca_shared_proc(get_lppaca()));
-#ifdef CONFIG_PPC_BOOK3S_64
- seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!radix_enabled())
+ seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
parse_em_data(m);
maxmem_data(m);
@@ -661,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
+
+ if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+ /*
+ * The hypervisor assigns VAS resources based
+ * on entitled capacity for shared mode.
+ * Reconfig VAS windows based on DLPAR CPU events.
+ */
+ if (pseries_vas_dlpar_cpu() != 0)
+ retval = H_HARDWARE;
+ }
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 210a37a065fb..634fac5db3f9 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -26,6 +26,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include "pseries.h"
+#include "vas.h" /* vas_migration_handler() */
#include "../../kernel/cacheinfo.h"
static struct kobject *mobility_kobj;
@@ -47,6 +48,39 @@ struct update_props_workarea {
#define MIGRATION_SCOPE (1)
#define PRRN_SCOPE -2
+#ifdef CONFIG_PPC_WATCHDOG
+static unsigned int nmi_wd_lpm_factor = 200;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = {
+ {
+ .procname = "nmi_wd_lpm_factor",
+ .data = &nmi_wd_lpm_factor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ },
+ {}
+};
+static struct ctl_table nmi_wd_lpm_factor_sysctl_root[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = nmi_wd_lpm_factor_ctl_table,
+ },
+ {}
+};
+
+static int __init register_nmi_wd_lpm_factor_sysctl(void)
+{
+ register_sysctl_table(nmi_wd_lpm_factor_sysctl_root);
+
+ return 0;
+}
+device_initcall(register_nmi_wd_lpm_factor_sysctl);
+#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_PPC_WATCHDOG */
+
static int mobility_rtas_call(int token, char *buf, s32 scope)
{
int rc;
@@ -182,7 +216,7 @@ static int update_dt_node(struct device_node *dn, s32 scope)
nprops = be32_to_cpu(upwa->nprops);
/* On the first call to ibm,update-properties for a node the
- * the first property value descriptor contains an empty
+ * first property value descriptor contains an empty
* property name, the property value length encoded as u32,
* and the property value is the node path being updated.
*/
@@ -265,7 +299,7 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
return rc;
}
-int pseries_devicetree_update(s32 scope)
+static int pseries_devicetree_update(s32 scope)
{
char *rtas_buf;
__be32 *data;
@@ -426,6 +460,43 @@ static int wait_for_vasi_session_suspending(u64 handle)
return ret;
}
+static void wait_for_vasi_session_completed(u64 handle)
+{
+ unsigned long state = 0;
+ int ret;
+
+ pr_info("waiting for memory transfer to complete...\n");
+
+ /*
+ * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED.
+ */
+ while (true) {
+ ret = poll_vasi_state(handle, &state);
+
+ /*
+ * If the memory transfer is already complete and the migration
+ * has been cleaned up by the hypervisor, H_PARAMETER is return,
+ * which is translate in EINVAL by poll_vasi_state().
+ */
+ if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) {
+ pr_info("memory transfer completed.\n");
+ break;
+ }
+
+ if (ret) {
+ pr_err("H_VASI_STATE return error (%d)\n", ret);
+ break;
+ }
+
+ if (state != H_VASI_RESUMED) {
+ pr_err("unexpected H_VASI_STATE result %lu\n", state);
+ break;
+ }
+
+ msleep(500);
+ }
+}
+
static void prod_single(unsigned int target_cpu)
{
long hvrc;
@@ -451,11 +522,15 @@ static void prod_others(void)
static u16 clamp_slb_size(void)
{
+#ifdef CONFIG_PPC_64S_HASH_MMU
u16 prev = mmu_slb_size;
slb_set_size(SLB_MIN_SIZE);
return prev;
+#else
+ return 0;
+#endif
}
static int do_suspend(void)
@@ -660,17 +735,47 @@ static int pseries_suspend(u64 handle)
static int pseries_migrate_partition(u64 handle)
{
int ret;
+ unsigned int factor = 0;
+
+#ifdef CONFIG_PPC_WATCHDOG
+ factor = nmi_wd_lpm_factor;
+#endif
+ /*
+ * When the migration is initiated, the hypervisor changes VAS
+ * mappings to prepare before OS gets the notification and
+ * closes all VAS windows. NX generates continuous faults during
+ * this time and the user space can not differentiate these
+ * faults from the migration event. So reduce this time window
+ * by closing VAS windows at the beginning of this function.
+ */
+ vas_migration_handler(VAS_SUSPEND);
ret = wait_for_vasi_session_suspending(handle);
if (ret)
- return ret;
+ goto out;
+
+ if (factor)
+ watchdog_nmi_set_timeout_pct(factor);
ret = pseries_suspend(handle);
- if (ret == 0)
+ if (ret == 0) {
post_mobility_fixup();
- else
+ /*
+ * Wait until the memory transfer is complete, so that the user
+ * space process returns from the syscall after the transfer is
+ * complete. This allows the user hooks to be executed at the
+ * right time.
+ */
+ wait_for_vasi_session_completed(handle);
+ } else
pseries_cancel_migration(handle, ret);
+ if (factor)
+ watchdog_nmi_set_timeout_pct(0);
+
+out:
+ vas_migration_handler(VAS_RESUME);
+
return ret;
}
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 8627362f613e..a3a71d37cb9a 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -7,6 +7,7 @@
#include <linux/crash_dump.h>
#include <linux/device.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/msi.h>
#include <asm/rtas.h>
@@ -321,27 +322,6 @@ out:
return request;
}
-static int check_msix_entries(struct pci_dev *pdev)
-{
- struct msi_desc *entry;
- int expected;
-
- /* There's no way for us to express to firmware that we want
- * a discontiguous, or non-zero based, range of MSI-X entries.
- * So we must reject such requests. */
-
- expected = 0;
- for_each_pci_msi_entry(entry, pdev) {
- if (entry->msi_attrib.entry_nr != expected) {
- pr_debug("rtas_msi: bad MSI-X entries.\n");
- return -EINVAL;
- }
- expected++;
- }
-
- return 0;
-}
-
static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
{
u32 addr_hi, addr_lo;
@@ -380,9 +360,6 @@ static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
if (quota && quota < nvec)
return quota;
- if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev))
- return -EINVAL;
-
/*
* Firmware currently refuse any non power of two allocation
* so we round up if the quota will allow it.
@@ -448,8 +425,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev
int nvec, msi_alloc_info_t *arg)
{
struct pci_dev *pdev = to_pci_dev(dev);
- struct msi_desc *desc = first_pci_msi_entry(pdev);
- int type = desc->msi_attrib.is_msix ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
+ int type = pdev->msix_enabled ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
}
@@ -530,9 +506,16 @@ static struct irq_chip pseries_pci_msi_irq_chip = {
.irq_write_msi_msg = pseries_msi_write_msg,
};
+
+/*
+ * Set MSI_FLAG_MSIX_CONTIGUOUS as there is no way to express to
+ * firmware to request a discontiguous or non-zero based range of
+ * MSI-X entries. Core code will reject such setup attempts.
+ */
static struct msi_domain_info pseries_msi_domain_info = {
.flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX |
+ MSI_FLAG_MSIX_CONTIGUOUS),
.ops = &pseries_pci_msi_domain_ops,
.chip = &pseries_pci_msi_irq_chip,
};
@@ -580,7 +563,7 @@ static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
int hwirq;
int i, ret;
- hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_attrib.entry_nr);
+ hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index);
if (hwirq < 0) {
dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
return hwirq;
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 69db2eca367f..cbf1720eb4aa 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -13,9 +13,9 @@
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/uaccess.h>
+#include <linux/of.h>
#include <asm/nvram.h>
#include <asm/rtas.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
/* Max bytes to read/write in one go */
diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
new file mode 100644
index 000000000000..526c621b098b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Platform energy and frequency attributes driver
+ *
+ * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a
+ * directory structure containing files in keyword - value pairs that specify
+ * energy and frequency configuration of the system.
+ *
+ * The format of exposing the sysfs information is as follows:
+ * /sys/firmware/papr/energy_scale_info/
+ * |-- <id>/
+ * |-- desc
+ * |-- value
+ * |-- value_desc (if exists)
+ * |-- <id>/
+ * |-- desc
+ * |-- value
+ * |-- value_desc (if exists)
+ *
+ * Copyright 2022 IBM Corp.
+ */
+
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "pseries.h"
+
+/*
+ * Flag attributes to fetch either all or one attribute from the HCALL
+ * flag = BE(0) => fetch all attributes with firstAttributeId = 0
+ * flag = BE(1) => fetch a single attribute with firstAttributeId = id
+ */
+#define ESI_FLAGS_ALL 0
+#define ESI_FLAGS_SINGLE (1ull << 63)
+
+#define KOBJ_MAX_ATTRS 3
+
+#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr)
+#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute)
+#define CURR_MAX_ESI_ATTRS 8
+
+struct energy_scale_attribute {
+ __be64 id;
+ __be64 val;
+ u8 desc[64];
+ u8 value_desc[64];
+} __packed;
+
+struct h_energy_scale_info_hdr {
+ __be64 num_attrs;
+ __be64 array_offset;
+ u8 data_header_version;
+} __packed;
+
+struct papr_attr {
+ u64 id;
+ struct kobj_attribute kobj_attr;
+};
+
+struct papr_group {
+ struct attribute_group pg;
+ struct papr_attr pgattrs[KOBJ_MAX_ATTRS];
+};
+
+static struct papr_group *papr_groups;
+/* /sys/firmware/papr */
+static struct kobject *papr_kobj;
+/* /sys/firmware/papr/energy_scale_info */
+static struct kobject *esi_kobj;
+
+/*
+ * Energy modes can change dynamically hence making a new hcall each time the
+ * information needs to be retrieved
+ */
+static int papr_get_attr(u64 id, struct energy_scale_attribute *esi)
+{
+ int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+ int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+ struct energy_scale_attribute *curr_esi;
+ struct h_energy_scale_info_hdr *hdr;
+ char *buf;
+
+ buf = kmalloc(esi_buf_size, GFP_KERNEL);
+ if (buf == NULL)
+ return -ENOMEM;
+
+retry:
+ ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE,
+ id, virt_to_phys(buf),
+ esi_buf_size);
+
+ /*
+ * If the hcall fails with not enough memory for either the
+ * header or data, attempt to allocate more
+ */
+ if (ret == H_PARTIAL || ret == H_P4) {
+ char *temp_buf;
+
+ max_esi_attrs += 4;
+ esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+ temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL);
+ if (temp_buf)
+ buf = temp_buf;
+ else
+ return -ENOMEM;
+
+ goto retry;
+ }
+
+ if (ret != H_SUCCESS) {
+ pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO");
+ ret = -EIO;
+ goto out_buf;
+ }
+
+ hdr = (struct h_energy_scale_info_hdr *) buf;
+ curr_esi = (struct energy_scale_attribute *)
+ (buf + be64_to_cpu(hdr->array_offset));
+
+ if (esi_buf_size <
+ be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs)
+ * sizeof(struct energy_scale_attribute))) {
+ ret = -EIO;
+ goto out_buf;
+ }
+
+ *esi = *curr_esi;
+
+out_buf:
+ kfree(buf);
+
+ return ret;
+}
+
+/*
+ * Extract and export the description of the energy scale attributes
+ */
+static ssize_t desc_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%s\n", esi.desc);
+}
+
+/*
+ * Extract and export the numeric value of the energy scale attributes
+ */
+static ssize_t val_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val));
+}
+
+/*
+ * Extract and export the value description in string format of the energy
+ * scale attributes
+ */
+static ssize_t val_desc_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%s\n", esi.value_desc);
+}
+
+static struct papr_ops_info {
+ const char *attr_name;
+ ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr,
+ char *buf);
+} ops_info[KOBJ_MAX_ATTRS] = {
+ { "desc", desc_show },
+ { "value", val_show },
+ { "value_desc", val_desc_show },
+};
+
+static void add_attr(u64 id, int index, struct papr_attr *attr)
+{
+ attr->id = id;
+ sysfs_attr_init(&attr->kobj_attr.attr);
+ attr->kobj_attr.attr.name = ops_info[index].attr_name;
+ attr->kobj_attr.attr.mode = 0444;
+ attr->kobj_attr.show = ops_info[index].show;
+}
+
+static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc)
+{
+ int i;
+
+ for (i = 0; i < KOBJ_MAX_ATTRS; i++) {
+ if (!strcmp(ops_info[i].attr_name, "value_desc") &&
+ !show_val_desc) {
+ continue;
+ }
+ add_attr(id, i, &pg->pgattrs[i]);
+ pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr;
+ }
+
+ return sysfs_create_group(esi_kobj, &pg->pg);
+}
+
+
+static int __init papr_init(void)
+{
+ int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+ int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+ struct h_energy_scale_info_hdr *esi_hdr;
+ struct energy_scale_attribute *esi_attrs;
+ uint64_t num_attrs;
+ char *esi_buf;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR) ||
+ !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) {
+ return -ENXIO;
+ }
+
+ esi_buf = kmalloc(esi_buf_size, GFP_KERNEL);
+ if (esi_buf == NULL)
+ return -ENOMEM;
+ /*
+ * hcall(
+ * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info
+ * uint64 flags, // Per the flag request
+ * uint64 firstAttributeId, // The attribute id
+ * uint64 bufferAddress, // Guest physical address of the output buffer
+ * uint64 bufferSize); // The size in bytes of the output buffer
+ */
+retry:
+
+ ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0,
+ virt_to_phys(esi_buf), esi_buf_size);
+
+ /*
+ * If the hcall fails with not enough memory for either the
+ * header or data, attempt to allocate more
+ */
+ if (ret == H_PARTIAL || ret == H_P4) {
+ char *temp_esi_buf;
+
+ max_esi_attrs += 4;
+ esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+ temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL);
+ if (temp_esi_buf)
+ esi_buf = temp_esi_buf;
+ else
+ return -ENOMEM;
+
+ goto retry;
+ }
+
+ if (ret != H_SUCCESS) {
+ pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret);
+ goto out_free_esi_buf;
+ }
+
+ esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf;
+ num_attrs = be64_to_cpu(esi_hdr->num_attrs);
+ esi_attrs = (struct energy_scale_attribute *)
+ (esi_buf + be64_to_cpu(esi_hdr->array_offset));
+
+ if (esi_buf_size <
+ be64_to_cpu(esi_hdr->array_offset) +
+ (num_attrs * sizeof(struct energy_scale_attribute))) {
+ goto out_free_esi_buf;
+ }
+
+ papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL);
+ if (!papr_groups)
+ goto out_free_esi_buf;
+
+ papr_kobj = kobject_create_and_add("papr", firmware_kobj);
+ if (!papr_kobj) {
+ pr_warn("kobject_create_and_add papr failed\n");
+ goto out_papr_groups;
+ }
+
+ esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj);
+ if (!esi_kobj) {
+ pr_warn("kobject_create_and_add energy_scale_info failed\n");
+ goto out_kobj;
+ }
+
+ /* Allocate the groups before registering */
+ for (idx = 0; idx < num_attrs; idx++) {
+ papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1,
+ sizeof(*papr_groups[idx].pg.attrs),
+ GFP_KERNEL);
+ if (!papr_groups[idx].pg.attrs)
+ goto out_pgattrs;
+
+ papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld",
+ be64_to_cpu(esi_attrs[idx].id));
+ if (papr_groups[idx].pg.name == NULL)
+ goto out_pgattrs;
+ }
+
+ for (idx = 0; idx < num_attrs; idx++) {
+ bool show_val_desc = true;
+
+ /* Do not add the value desc attr if it does not exist */
+ if (strnlen(esi_attrs[idx].value_desc,
+ sizeof(esi_attrs[idx].value_desc)) == 0)
+ show_val_desc = false;
+
+ if (add_attr_group(be64_to_cpu(esi_attrs[idx].id),
+ &papr_groups[idx],
+ show_val_desc)) {
+ pr_warn("Failed to create papr attribute group %s\n",
+ papr_groups[idx].pg.name);
+ idx = num_attrs;
+ goto out_pgattrs;
+ }
+ }
+
+ kfree(esi_buf);
+ return 0;
+out_pgattrs:
+ for (i = 0; i < idx ; i++) {
+ kfree(papr_groups[i].pg.attrs);
+ kfree(papr_groups[i].pg.name);
+ }
+ kobject_put(esi_kobj);
+out_kobj:
+ kobject_put(papr_kobj);
+out_papr_groups:
+ kfree(papr_groups);
+out_free_esi_buf:
+ kfree(esi_buf);
+
+ return -ENOMEM;
+}
+
+machine_device_initcall(pseries, papr_init);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f48e87ac89c9..2f8385523a13 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -19,6 +19,7 @@
#include <asm/papr_pdsm.h>
#include <asm/mce.h>
#include <asm/unaligned.h>
+#include <linux/perf_event.h>
#define BIND_ANY_ADDR (~0ul)
@@ -28,7 +29,7 @@
(1ul << ND_CMD_SET_CONFIG_DATA) | \
(1ul << ND_CMD_CALL))
-/* DIMM health bitmap bitmap indicators */
+/* DIMM health bitmap indicators */
/* SCM device is unable to persist memory contents */
#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
/* SCM device failed to persist memory contents */
@@ -120,6 +121,9 @@ struct papr_scm_priv {
/* length of the stat buffer as expected by phyp */
size_t stat_buffer_len;
+
+ /* The bits which needs to be overridden */
+ u64 health_bitmap_inject_mask;
};
static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -340,6 +344,191 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
+#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)
+
+static const char * const nvdimm_events_map[] = {
+ [1] = "CtlResCt",
+ [2] = "CtlResTm",
+ [3] = "PonSecs ",
+ [4] = "MemLife ",
+ [5] = "CritRscU",
+ [6] = "HostLCnt",
+ [7] = "HostSCnt",
+ [8] = "HostSDur",
+ [9] = "HostLDur",
+ [10] = "MedRCnt ",
+ [11] = "MedWCnt ",
+ [12] = "MedRDur ",
+ [13] = "MedWDur ",
+ [14] = "CchRHCnt",
+ [15] = "CchWHCnt",
+ [16] = "FastWCnt",
+};
+
+static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
+{
+ struct papr_scm_perf_stat *stat;
+ struct papr_scm_perf_stats *stats;
+ struct papr_scm_priv *p = dev_get_drvdata(dev);
+ int rc, size;
+
+ /* Invalid eventcode */
+ if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map))
+ return -EINVAL;
+
+ /* Allocate request buffer enough to hold single performance stat */
+ size = sizeof(struct papr_scm_perf_stats) +
+ sizeof(struct papr_scm_perf_stat);
+
+ if (!p)
+ return -EINVAL;
+
+ stats = kzalloc(size, GFP_KERNEL);
+ if (!stats)
+ return -ENOMEM;
+
+ stat = &stats->scm_statistic[0];
+ memcpy(&stat->stat_id,
+ nvdimm_events_map[event->attr.config],
+ sizeof(stat->stat_id));
+ stat->stat_val = 0;
+
+ rc = drc_pmem_query_stats(p, stats, 1);
+ if (rc < 0) {
+ kfree(stats);
+ return rc;
+ }
+
+ *count = be64_to_cpu(stat->stat_val);
+ kfree(stats);
+ return 0;
+}
+
+static int papr_scm_pmu_event_init(struct perf_event *event)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+ struct papr_scm_priv *p;
+
+ if (!nd_pmu)
+ return -EINVAL;
+
+ /* test the event attr type for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* it does not support event sampling mode */
+ if (is_sampling_event(event))
+ return -EOPNOTSUPP;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
+ if (!p)
+ return -EINVAL;
+
+ /* Invalid eventcode */
+ if (event->attr.config == 0 || event->attr.config > 16)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int papr_scm_pmu_add(struct perf_event *event, int flags)
+{
+ u64 count;
+ int rc;
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (!nd_pmu)
+ return -EINVAL;
+
+ if (flags & PERF_EF_START) {
+ rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
+ if (rc)
+ return rc;
+
+ local64_set(&event->hw.prev_count, count);
+ }
+
+ return 0;
+}
+
+static void papr_scm_pmu_read(struct perf_event *event)
+{
+ u64 prev, now;
+ int rc;
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (!nd_pmu)
+ return;
+
+ rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
+ if (rc)
+ return;
+
+ prev = local64_xchg(&event->hw.prev_count, now);
+ local64_add(now - prev, &event->count);
+}
+
+static void papr_scm_pmu_del(struct perf_event *event, int flags)
+{
+ papr_scm_pmu_read(event);
+}
+
+static void papr_scm_pmu_register(struct papr_scm_priv *p)
+{
+ struct nvdimm_pmu *nd_pmu;
+ int rc, nodeid;
+
+ nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
+ if (!nd_pmu) {
+ rc = -ENOMEM;
+ goto pmu_err_print;
+ }
+
+ if (!p->stat_buffer_len) {
+ rc = -ENOENT;
+ goto pmu_check_events_err;
+ }
+
+ nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
+ nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
+ nd_pmu->pmu.read = papr_scm_pmu_read;
+ nd_pmu->pmu.add = papr_scm_pmu_add;
+ nd_pmu->pmu.del = papr_scm_pmu_del;
+
+ nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_EXCLUDE;
+
+ /*updating the cpumask variable */
+ nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
+ nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);
+
+ rc = register_nvdimm_pmu(nd_pmu, p->pdev);
+ if (rc)
+ goto pmu_check_events_err;
+
+ /*
+ * Set archdata.priv value to nvdimm_pmu structure, to handle the
+ * unregistering of pmu device.
+ */
+ p->pdev->archdata.priv = nd_pmu;
+ return;
+
+pmu_check_events_err:
+ kfree(nd_pmu);
+pmu_err_print:
+ dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
+}
+
+#else
+static void papr_scm_pmu_register(struct papr_scm_priv *p) { }
+#endif
+
/*
* Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
* health information.
@@ -347,19 +536,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
static int __drc_pmem_query_health(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ u64 bitmap = 0;
long rc;
/* issue the hcall */
rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
- if (rc != H_SUCCESS) {
+ if (rc == H_SUCCESS)
+ bitmap = ret[0] & ret[1];
+ else if (rc == H_FUNCTION)
+ dev_info_once(&p->pdev->dev,
+ "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap");
+ else {
+
dev_err(&p->pdev->dev,
"Failed to query health information, Err:%ld\n", rc);
return -ENXIO;
}
p->lasthealth_jiffies = jiffies;
- p->health_bitmap = ret[0] & ret[1];
-
+ /* Allow injecting specific health bits via inject mask. */
+ if (p->health_bitmap_inject_mask)
+ bitmap = (bitmap & ~p->health_bitmap_inject_mask) |
+ p->health_bitmap_inject_mask;
+ WRITE_ONCE(p->health_bitmap, bitmap);
dev_dbg(&p->pdev->dev,
"Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
ret[0], ret[1]);
@@ -669,6 +868,56 @@ out:
return rc;
}
+/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_smart_inject(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ int rc;
+ u32 supported_flags = 0;
+ u64 inject_mask = 0, clear_mask = 0;
+ u64 mask;
+
+ /* Check for individual smart error flags and update inject/clear masks */
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+ supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+ if (payload->smart_inject.fatal_enable)
+ inject_mask |= PAPR_PMEM_HEALTH_FATAL;
+ else
+ clear_mask |= PAPR_PMEM_HEALTH_FATAL;
+ }
+
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+ supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+ if (payload->smart_inject.unsafe_shutdown_enable)
+ inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ else
+ clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ }
+
+ dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n",
+ inject_mask, clear_mask);
+
+ /* Prevent concurrent access to dimm health bitmap related members */
+ rc = mutex_lock_interruptible(&p->health_mutex);
+ if (rc)
+ return rc;
+
+ /* Use inject/clear masks to set health_bitmap_inject_mask */
+ mask = READ_ONCE(p->health_bitmap_inject_mask);
+ mask = (mask & ~clear_mask) | inject_mask;
+ WRITE_ONCE(p->health_bitmap_inject_mask, mask);
+
+ /* Invalidate cached health bitmap */
+ p->lasthealth_jiffies = 0;
+
+ mutex_unlock(&p->health_mutex);
+
+ /* Return the supported flags back to userspace */
+ payload->smart_inject.flags = supported_flags;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
/*
* 'struct pdsm_cmd_desc'
* Identifies supported PDSMs' expected length of in/out payloads
@@ -702,6 +951,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
.size_out = sizeof(struct nd_papr_pdsm_health),
.service = papr_pdsm_health,
},
+
+ [PAPR_PDSM_SMART_INJECT] = {
+ .size_in = sizeof(struct nd_papr_pdsm_smart_inject),
+ .size_out = sizeof(struct nd_papr_pdsm_smart_inject),
+ .service = papr_pdsm_smart_inject,
+ },
/* Empty */
[PAPR_PDSM_MAX] = {
.size_in = 0,
@@ -838,6 +1093,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
+static ssize_t health_bitmap_inject_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sprintf(buf, "%#llx\n",
+ READ_ONCE(p->health_bitmap_inject_mask));
+}
+
+static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject);
+
static ssize_t perf_stats_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -952,10 +1220,11 @@ static struct attribute *papr_nd_attributes[] = {
&dev_attr_flags.attr,
&dev_attr_perf_stats.attr,
&dev_attr_dirty_shutdown.attr,
+ &dev_attr_health_bitmap_inject.attr,
NULL,
};
-static struct attribute_group papr_nd_attribute_group = {
+static const struct attribute_group papr_nd_attribute_group = {
.name = "papr",
.is_visible = papr_nd_attribute_visible,
.attrs = papr_nd_attributes,
@@ -1236,6 +1505,7 @@ static int papr_scm_probe(struct platform_device *pdev)
goto err2;
platform_set_drvdata(pdev, p);
+ papr_scm_pmu_register(p);
return 0;
@@ -1254,6 +1524,11 @@ static int papr_scm_remove(struct platform_device *pdev)
nvdimm_bus_unregister(p->bus);
drc_pmem_unbind(p);
+
+ if (pdev->archdata.priv)
+ unregister_nvdimm_pmu(pdev->archdata.priv);
+
+ pdev->archdata.priv = NULL;
kfree(p->bus_desc.provider_name);
kfree(p);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 3b6800f774c2..6e671c3809ec 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -14,7 +14,6 @@
#include <asm/eeh.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
#include <asm/ppc-pci.h>
#include <asm/pci.h>
#include "pseries.h"
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 90c9d3531694..4ba824568119 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb)
pseries_msi_free_domains(phb);
+ /* Keep a reference so phb isn't freed yet */
+ get_device(&host_bridge->dev);
+
/* Remove the PCI bus and unregister the bridge device from sysfs */
phb->bus = NULL;
pci_remove_bus(b);
@@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb)
* the pcibios_free_controller_deferred() callback;
* see pseries_root_bridge_prepare().
*/
+ put_device(&host_bridge->dev);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
new file mode 100644
index 000000000000..f4b5b5a64db3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * POWER LPAR Platform KeyStore(PLPKS)
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Nayna Jain <nayna@linux.ibm.com>
+ *
+ * Provides access to variables stored in Power LPAR Platform KeyStore(PLPKS).
+ */
+
+#define pr_fmt(fmt) "plpks: " fmt
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+
+#include "plpks.h"
+
+#define PKS_FW_OWNER 0x1
+#define PKS_BOOTLOADER_OWNER 0x2
+#define PKS_OS_OWNER 0x3
+
+#define LABEL_VERSION 0
+#define MAX_LABEL_ATTR_SIZE 16
+#define MAX_NAME_SIZE 239
+#define MAX_DATA_SIZE 4000
+
+#define PKS_FLUSH_MAX_TIMEOUT 5000 //msec
+#define PKS_FLUSH_SLEEP 10 //msec
+#define PKS_FLUSH_SLEEP_RANGE 400
+
+static u8 *ospassword;
+static u16 ospasswordlength;
+
+// Retrieved with H_PKS_GET_CONFIG
+static u16 maxpwsize;
+static u16 maxobjsize;
+
+struct plpks_auth {
+ u8 version;
+ u8 consumer;
+ __be64 rsvd0;
+ __be32 rsvd1;
+ __be16 passwordlength;
+ u8 password[];
+} __packed __aligned(16);
+
+struct label_attr {
+ u8 prefix[8];
+ u8 version;
+ u8 os;
+ u8 length;
+ u8 reserved[5];
+};
+
+struct label {
+ struct label_attr attr;
+ u8 name[MAX_NAME_SIZE];
+ size_t size;
+};
+
+static int pseries_status_to_err(int rc)
+{
+ int err;
+
+ switch (rc) {
+ case H_SUCCESS:
+ err = 0;
+ break;
+ case H_FUNCTION:
+ err = -ENXIO;
+ break;
+ case H_P1:
+ case H_P2:
+ case H_P3:
+ case H_P4:
+ case H_P5:
+ case H_P6:
+ err = -EINVAL;
+ break;
+ case H_NOT_FOUND:
+ err = -ENOENT;
+ break;
+ case H_BUSY:
+ err = -EBUSY;
+ break;
+ case H_AUTHORITY:
+ err = -EPERM;
+ break;
+ case H_NO_MEM:
+ err = -ENOMEM;
+ break;
+ case H_RESOURCE:
+ err = -EEXIST;
+ break;
+ case H_TOO_BIG:
+ err = -EFBIG;
+ break;
+ case H_STATE:
+ err = -EIO;
+ break;
+ case H_R_STATE:
+ err = -EIO;
+ break;
+ case H_IN_USE:
+ err = -EEXIST;
+ break;
+ case H_ABORTED:
+ err = -EINTR;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int plpks_gen_password(void)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ u8 *password, consumer = PKS_OS_OWNER;
+ int rc;
+
+ password = kzalloc(maxpwsize, GFP_KERNEL);
+ if (!password)
+ return -ENOMEM;
+
+ rc = plpar_hcall(H_PKS_GEN_PASSWORD, retbuf, consumer, 0,
+ virt_to_phys(password), maxpwsize);
+
+ if (!rc) {
+ ospasswordlength = maxpwsize;
+ ospassword = kzalloc(maxpwsize, GFP_KERNEL);
+ if (!ospassword) {
+ kfree(password);
+ return -ENOMEM;
+ }
+ memcpy(ospassword, password, ospasswordlength);
+ } else {
+ if (rc == H_IN_USE) {
+ pr_warn("Password is already set for POWER LPAR Platform KeyStore\n");
+ rc = 0;
+ } else {
+ goto out;
+ }
+ }
+out:
+ kfree(password);
+
+ return pseries_status_to_err(rc);
+}
+
+static struct plpks_auth *construct_auth(u8 consumer)
+{
+ struct plpks_auth *auth;
+
+ if (consumer > PKS_OS_OWNER)
+ return ERR_PTR(-EINVAL);
+
+ auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL);
+ if (!auth)
+ return ERR_PTR(-ENOMEM);
+
+ auth->version = 1;
+ auth->consumer = consumer;
+ auth->rsvd0 = 0;
+ auth->rsvd1 = 0;
+
+ if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) {
+ auth->passwordlength = 0;
+ return auth;
+ }
+
+ memcpy(auth->password, ospassword, ospasswordlength);
+
+ auth->passwordlength = cpu_to_be16(ospasswordlength);
+
+ return auth;
+}
+
+/**
+ * Label is combination of label attributes + name.
+ * Label attributes are used internally by kernel and not exposed to the user.
+ */
+static struct label *construct_label(char *component, u8 varos, u8 *name,
+ u16 namelen)
+{
+ struct label *label;
+ size_t slen;
+
+ if (!name || namelen > MAX_NAME_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ slen = strlen(component);
+ if (component && slen > sizeof(label->attr.prefix))
+ return ERR_PTR(-EINVAL);
+
+ label = kzalloc(sizeof(*label), GFP_KERNEL);
+ if (!label)
+ return ERR_PTR(-ENOMEM);
+
+ if (component)
+ memcpy(&label->attr.prefix, component, slen);
+
+ label->attr.version = LABEL_VERSION;
+ label->attr.os = varos;
+ label->attr.length = MAX_LABEL_ATTR_SIZE;
+ memcpy(&label->name, name, namelen);
+
+ label->size = sizeof(struct label_attr) + namelen;
+
+ return label;
+}
+
+static int _plpks_get_config(void)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct {
+ u8 version;
+ u8 flags;
+ __be32 rsvd0;
+ __be16 maxpwsize;
+ __be16 maxobjlabelsize;
+ __be16 maxobjsize;
+ __be32 totalsize;
+ __be32 usedspace;
+ __be32 supportedpolicies;
+ __be64 rsvd1;
+ } __packed config;
+ size_t size;
+ int rc;
+
+ size = sizeof(config);
+
+ rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(&config), size);
+
+ if (rc != H_SUCCESS)
+ return pseries_status_to_err(rc);
+
+ maxpwsize = be16_to_cpu(config.maxpwsize);
+ maxobjsize = be16_to_cpu(config.maxobjsize);
+
+ return 0;
+}
+
+static int plpks_confirm_object_flushed(struct label *label,
+ struct plpks_auth *auth)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ u64 timeout = 0;
+ u8 status;
+ int rc;
+
+ do {
+ rc = plpar_hcall(H_PKS_CONFIRM_OBJECT_FLUSHED, retbuf,
+ virt_to_phys(auth), virt_to_phys(label),
+ label->size);
+
+ status = retbuf[0];
+ if (rc) {
+ if (rc == H_NOT_FOUND && status == 1)
+ rc = 0;
+ break;
+ }
+
+ if (!rc && status == 1)
+ break;
+
+ usleep_range(PKS_FLUSH_SLEEP,
+ PKS_FLUSH_SLEEP + PKS_FLUSH_SLEEP_RANGE);
+ timeout = timeout + PKS_FLUSH_SLEEP;
+ } while (timeout < PKS_FLUSH_MAX_TIMEOUT);
+
+ rc = pseries_status_to_err(rc);
+
+ return rc;
+}
+
+int plpks_write_var(struct plpks_var var)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct plpks_auth *auth;
+ struct label *label;
+ int rc;
+
+ if (!var.component || !var.data || var.datalen <= 0 ||
+ var.namelen > MAX_NAME_SIZE || var.datalen > MAX_DATA_SIZE)
+ return -EINVAL;
+
+ if (var.policy & SIGNEDUPDATE)
+ return -EINVAL;
+
+ auth = construct_auth(PKS_OS_OWNER);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ label = construct_label(var.component, var.os, var.name, var.namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out;
+ }
+
+ rc = plpar_hcall(H_PKS_WRITE_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size, var.policy,
+ virt_to_phys(var.data), var.datalen);
+
+ if (!rc)
+ rc = plpks_confirm_object_flushed(label, auth);
+
+ if (rc)
+ pr_err("Failed to write variable %s for component %s with error %d\n",
+ var.name, var.component, rc);
+
+ rc = pseries_status_to_err(rc);
+ kfree(label);
+out:
+ kfree(auth);
+
+ return rc;
+}
+
+int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct plpks_auth *auth;
+ struct label *label;
+ int rc;
+
+ if (!component || vname.namelen > MAX_NAME_SIZE)
+ return -EINVAL;
+
+ auth = construct_auth(PKS_OS_OWNER);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ label = construct_label(component, varos, vname.name, vname.namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out;
+ }
+
+ rc = plpar_hcall(H_PKS_REMOVE_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size);
+
+ if (!rc)
+ rc = plpks_confirm_object_flushed(label, auth);
+
+ if (rc)
+ pr_err("Failed to remove variable %s for component %s with error %d\n",
+ vname.name, component, rc);
+
+ rc = pseries_status_to_err(rc);
+ kfree(label);
+out:
+ kfree(auth);
+
+ return rc;
+}
+
+static int plpks_read_var(u8 consumer, struct plpks_var *var)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct plpks_auth *auth;
+ struct label *label;
+ u8 *output;
+ int rc;
+
+ if (var->namelen > MAX_NAME_SIZE)
+ return -EINVAL;
+
+ auth = construct_auth(PKS_OS_OWNER);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ label = construct_label(var->component, var->os, var->name,
+ var->namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out_free_auth;
+ }
+
+ output = kzalloc(maxobjsize, GFP_KERNEL);
+ if (!output) {
+ rc = -ENOMEM;
+ goto out_free_label;
+ }
+
+ rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size, virt_to_phys(output),
+ maxobjsize);
+
+ if (rc != H_SUCCESS) {
+ pr_err("Failed to read variable %s for component %s with error %d\n",
+ var->name, var->component, rc);
+ rc = pseries_status_to_err(rc);
+ goto out_free_output;
+ }
+
+ if (var->datalen == 0 || var->datalen > retbuf[0])
+ var->datalen = retbuf[0];
+
+ var->data = kzalloc(var->datalen, GFP_KERNEL);
+ if (!var->data) {
+ rc = -ENOMEM;
+ goto out_free_output;
+ }
+ var->policy = retbuf[1];
+
+ memcpy(var->data, output, var->datalen);
+ rc = 0;
+
+out_free_output:
+ kfree(output);
+out_free_label:
+ kfree(label);
+out_free_auth:
+ kfree(auth);
+
+ return rc;
+}
+
+int plpks_read_os_var(struct plpks_var *var)
+{
+ return plpks_read_var(PKS_OS_OWNER, var);
+}
+
+int plpks_read_fw_var(struct plpks_var *var)
+{
+ return plpks_read_var(PKS_FW_OWNER, var);
+}
+
+int plpks_read_bootloader_var(struct plpks_var *var)
+{
+ return plpks_read_var(PKS_BOOTLOADER_OWNER, var);
+}
+
+static __init int pseries_plpks_init(void)
+{
+ int rc;
+
+ rc = _plpks_get_config();
+
+ if (rc) {
+ pr_err("POWER LPAR Platform KeyStore is not supported or enabled\n");
+ return rc;
+ }
+
+ rc = plpks_gen_password();
+ if (rc)
+ pr_err("Failed setting POWER LPAR Platform KeyStore Password\n");
+ else
+ pr_info("POWER LPAR Platform KeyStore initialized successfully\n");
+
+ return rc;
+}
+machine_arch_initcall(pseries, pseries_plpks_init);
diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h
new file mode 100644
index 000000000000..c6a291367bb1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/plpks.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Nayna Jain <nayna@linux.ibm.com>
+ *
+ * Platform keystore for pseries LPAR(PLPKS).
+ */
+
+#ifndef _PSERIES_PLPKS_H
+#define _PSERIES_PLPKS_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#define OSSECBOOTAUDIT 0x40000000
+#define OSSECBOOTENFORCE 0x20000000
+#define WORLDREADABLE 0x08000000
+#define SIGNEDUPDATE 0x01000000
+
+#define PLPKS_VAR_LINUX 0x01
+#define PLPKS_VAR_COMMON 0x04
+
+struct plpks_var {
+ char *component;
+ u8 *name;
+ u8 *data;
+ u32 policy;
+ u16 namelen;
+ u16 datalen;
+ u8 os;
+};
+
+struct plpks_var_name {
+ u8 *name;
+ u16 namelen;
+};
+
+struct plpks_var_name_list {
+ u32 varcount;
+ struct plpks_var_name varlist[];
+};
+
+/**
+ * Writes the specified var and its data to PKS.
+ * Any caller of PKS driver should present a valid component type for
+ * their variable.
+ */
+int plpks_write_var(struct plpks_var var);
+
+/**
+ * Removes the specified var and its data from PKS.
+ */
+int plpks_remove_var(char *component, u8 varos,
+ struct plpks_var_name vname);
+
+/**
+ * Returns the data for the specified os variable.
+ */
+int plpks_read_os_var(struct plpks_var *var);
+
+/**
+ * Returns the data for the specified firmware variable.
+ */
+int plpks_read_fw_var(struct plpks_var *var);
+
+/**
+ * Returns the data for the specified bootloader variable.
+ */
+int plpks_read_bootloader_var(struct plpks_var *var);
+
+#endif
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index 439ac72c2470..3c290b9ed01b 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -15,7 +15,6 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c
index ee343ec6ab94..3676cb297767 100644
--- a/arch/powerpc/platforms/pseries/power.c
+++ b/arch/powerpc/platforms/pseries/power.c
@@ -51,7 +51,7 @@ static struct attribute *g[] = {
NULL,
};
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = g,
};
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 3544778e06d0..1d75b7742ef0 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -11,7 +11,7 @@
struct device_node;
-extern void request_event_sources_irqs(struct device_node *np,
+void __init request_event_sources_irqs(struct device_node *np,
irq_handler_t handler, const char *name);
#include <linux/of.h>
@@ -21,6 +21,7 @@ struct pt_regs;
extern int pSeries_system_reset_exception(struct pt_regs *regs);
extern int pSeries_machine_check_exception(struct pt_regs *regs);
extern long pseries_machine_check_realmode(struct pt_regs *regs);
+void pSeries_machine_check_log_err(void);
#ifdef CONFIG_SMP
extern void smp_init_pseries(void);
@@ -37,6 +38,7 @@ static inline void smp_init_pseries(void) { }
#endif
extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
+void pseries_machine_kexec(struct kimage *image);
extern void pSeries_final_fixup(void);
@@ -113,6 +115,13 @@ int dlpar_workqueue_init(void);
extern u32 pseries_security_flavor;
void pseries_setup_security_mitigations(void);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
void pseries_lpar_read_hblkrm_characteristics(void);
+#else
+static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
+#endif
+
+void pseries_rng_init(void);
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 56092dccfdb8..f12516c3998c 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
static int ras_check_exception_token;
-static void mce_process_errlog_event(struct irq_work *work);
-static struct irq_work mce_errlog_process_work = {
- .func = mce_process_errlog_event,
-};
-
#define EPOW_SENSOR_TOKEN 9
#define EPOW_SENSOR_INDEX 0
@@ -60,11 +55,17 @@ struct pseries_mc_errorlog {
* XX 2: Reserved.
* XXX 3: Type of UE error.
*
- * For error_type != MC_ERROR_TYPE_UE
+ * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
* XXXXXXXX
* X 1: Effective address provided.
* XXXXX 5: Reserved.
* XX 2: Type of SLB/ERAT/TLB error.
+ *
+ * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
+ * XXXXXXXX
+ * X 1: Error causing address provided.
+ * XXX 3: Type of error.
+ * XXXX 4: Reserved.
*/
u8 sub_err_type;
u8 reserved_1[6];
@@ -80,6 +81,7 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TYPE_TLB 0x04
#define MC_ERROR_TYPE_D_CACHE 0x05
#define MC_ERROR_TYPE_I_CACHE 0x07
+#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08
/* RTAS pseries MCE error sub types */
#define MC_ERROR_UE_INDETERMINATE 0
@@ -90,6 +92,7 @@ struct pseries_mc_errorlog {
#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
#define UE_LOGICAL_ADDR_PROVIDED 0x20
+#define MC_EFFECTIVE_ADDR_PROVIDED 0x80
#define MC_ERROR_SLB_PARITY 0
#define MC_ERROR_SLB_MULTIHIT 1
@@ -103,6 +106,9 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TLB_MULTIHIT 2
#define MC_ERROR_TLB_INDETERMINATE 3
+#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0
+#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1
+
static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
{
switch (mlog->error_type) {
@@ -112,6 +118,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
case MC_ERROR_TYPE_ERAT:
case MC_ERROR_TYPE_TLB:
return (mlog->sub_err_type & 0x03);
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ return (mlog->sub_err_type & 0x70) >> 4;
default:
return 0;
}
@@ -526,6 +534,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
disposition = RTAS_DISP_FULLY_RECOVERED;
break;
case MC_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
@@ -538,6 +547,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
+#endif
break;
default:
break;
@@ -656,7 +666,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_ERAT:
@@ -673,7 +683,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_TLB:
@@ -690,7 +700,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
break;
}
- if (mce_log->sub_err_type & 0x80)
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_D_CACHE:
@@ -699,6 +709,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
case MC_ERROR_TYPE_I_CACHE:
mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
break;
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ mce_err.error_type = MCE_ERROR_TYPE_RA;
+ switch (err_sub_type) {
+ case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+ break;
+ case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
case MC_ERROR_TYPE_UNKNOWN:
default:
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
@@ -715,7 +740,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log = NULL;
int disposition = rtas_error_disposition(errp);
- unsigned long msr;
u8 error_type;
if (!rtas_error_extended(errp))
@@ -729,40 +753,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
error_type = mce_log->error_type;
disposition = mce_handle_err_realmode(disposition, error_type);
-
- /*
- * Enable translation as we will be accessing per-cpu variables
- * in save_mce_event() which may fall outside RMO region, also
- * leave it enabled because subsequently we will be queuing work
- * to workqueues where again per-cpu variables accessed, besides
- * fwnmi_release_errinfo() crashes when called in realmode on
- * pseries.
- * Note: All the realmode handling like flushing SLB entries for
- * SLB multihit is done by now.
- */
out:
- msr = mfmsr();
- mtmsr(msr | MSR_IR | MSR_DR);
-
disposition = mce_handle_err_virtmode(regs, errp, mce_log,
disposition);
-
- /*
- * Queue irq work to log this rtas event later.
- * irq_work_queue uses per-cpu variables, so do this in virt
- * mode as well.
- */
- irq_work_queue(&mce_errlog_process_work);
-
- mtmsr(msr);
-
return disposition;
}
/*
* Process MCE rtas errlog event.
*/
-static void mce_process_errlog_event(struct irq_work *work)
+void pSeries_machine_check_log_err(void)
{
struct rtas_error_log *err;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 7f7369fec46b..599bd2c78514 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -10,10 +10,10 @@
#include <linux/kernel.h>
#include <linux/notifier.h>
#include <linux/proc_fs.h>
+#include <linux/security.h>
#include <linux/slab.h>
#include <linux/of.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <linux/uaccess.h>
#include <asm/mmu.h>
@@ -362,6 +362,10 @@ static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t coun
char *kbuf;
char *tmp;
+ rv = security_locked_down(LOCKDOWN_DEVICE_TREE);
+ if (rv)
+ return rv;
+
kbuf = memdup_user_nul(buf, count);
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
index 6268545947b8..6ddfdeaace9e 100644
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -10,6 +10,7 @@
#include <asm/archrandom.h>
#include <asm/machdep.h>
#include <asm/plpar_wrappers.h>
+#include "pseries.h"
static int pseries_get_random_long(unsigned long *v)
@@ -24,19 +25,13 @@ static int pseries_get_random_long(unsigned long *v)
return 0;
}
-static __init int rng_init(void)
+void __init pseries_rng_init(void)
{
struct device_node *dn;
dn = of_find_compatible_node(NULL, NULL, "ibm,random");
if (!dn)
- return -ENODEV;
-
- pr_info("Registering arch random hook.\n");
-
+ return;
ppc_md.get_random_seed = pseries_get_random_long;
-
of_node_put(dn);
- return 0;
}
-machine_subsys_initcall(pseries, rng_init);
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
index f8f73b47b107..b5853e9fcc3c 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.c
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -13,9 +13,10 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/crash_dump.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <asm/page.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/fadump.h>
#include <asm/fadump-internal.h>
@@ -39,7 +40,7 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
* This function is called in the capture kernel to get configuration details
* setup in the first kernel and passed to the f/w.
*/
-static void rtas_fadump_get_config(struct fw_dump *fadump_conf,
+static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
const struct rtas_fadump_mem_struct *fdm)
{
fadump_conf->boot_mem_addr[0] =
@@ -108,6 +109,12 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
fdm.hpte_region.destination_address = cpu_to_be64(addr);
addr += fadump_conf->hpte_region_size;
+ /*
+ * Align boot memory area destination address to page boundary to
+ * be able to mmap read this area in the vmcore.
+ */
+ addr = PAGE_ALIGN(addr);
+
/* RMA region section */
fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
fdm.rmr_region.source_data_type =
@@ -247,7 +254,7 @@ static inline int rtas_fadump_gpr_index(u64 id)
return i;
}
-static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
{
int i;
@@ -272,7 +279,7 @@ static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val
regs->dsisr = (unsigned long)reg_val;
}
-static struct rtas_fadump_reg_entry*
+static struct rtas_fadump_reg_entry* __init
rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
struct pt_regs *regs)
{
@@ -351,7 +358,7 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
/* Lower 4 bytes of reg_value contains logical cpu id */
cpu = (be64_to_cpu(reg_entry->reg_value) &
RTAS_FADUMP_CPU_ID_MASK);
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_mask)) {
RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
continue;
}
@@ -462,10 +469,10 @@ static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
be64_to_cpu(fdm_ptr->rmr_region.source_len),
be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
- /* Dump is active. Show reserved area start address. */
+ /* Dump is active. Show preserved area start address. */
if (fdm_active) {
- seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
- fadump_conf->reserve_dump_area_start);
+ seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
+ fadump_conf->boot_mem_top);
}
}
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
deleted file mode 100644
index 2879c4f0ceb7..000000000000
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ /dev/null
@@ -1,195 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * c 2001 PPC 64 Team, IBM Corp
- *
- * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com>
- *
- * When ppc64 hardware fails the service processor dumps internal state
- * of the system. After a reboot the operating system can access a dump
- * of this data using this driver. A dump exists if the device-tree
- * /chosen/ibm,scan-log-data property exists.
- *
- * This driver exports /proc/powerpc/scan-log-dump which can be read.
- * The driver supports only sequential reads.
- *
- * The driver looks at a write to the driver for the single word "reset".
- * If given, the driver will reset the scanlog so the platform can free it.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <asm/rtas.h>
-#include <asm/prom.h>
-
-#define MODULE_VERS "1.0"
-#define MODULE_NAME "scanlog"
-
-/* Status returns from ibm,scan-log-dump */
-#define SCANLOG_COMPLETE 0
-#define SCANLOG_HWERROR -1
-#define SCANLOG_CONTINUE 1
-
-
-static unsigned int ibm_scan_log_dump; /* RTAS token */
-static unsigned int *scanlog_buffer; /* The data buffer */
-
-static ssize_t scanlog_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- unsigned int *data = scanlog_buffer;
- int status;
- unsigned long len, off;
- unsigned int wait_time;
-
- if (count > RTAS_DATA_BUF_SIZE)
- count = RTAS_DATA_BUF_SIZE;
-
- if (count < 1024) {
- /* This is the min supported by this RTAS call. Rather
- * than do all the buffering we insist the user code handle
- * larger reads. As long as cp works... :)
- */
- printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
- return -EINVAL;
- }
-
- if (!access_ok(buf, count))
- return -EFAULT;
-
- for (;;) {
- wait_time = 500; /* default wait if no data */
- spin_lock(&rtas_data_buf_lock);
- memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
- status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
- (u32) __pa(rtas_data_buf), (u32) count);
- memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
- spin_unlock(&rtas_data_buf_lock);
-
- pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \
- "data[2]=%x\n", status, data[0], data[1], data[2]);
- switch (status) {
- case SCANLOG_COMPLETE:
- pr_debug("scanlog: hit eof\n");
- return 0;
- case SCANLOG_HWERROR:
- pr_debug("scanlog: hardware error reading data\n");
- return -EIO;
- case SCANLOG_CONTINUE:
- /* We may or may not have data yet */
- len = data[1];
- off = data[2];
- if (len > 0) {
- if (copy_to_user(buf, ((char *)data)+off, len))
- return -EFAULT;
- return len;
- }
- /* Break to sleep default time */
- break;
- default:
- /* Assume extended busy */
- wait_time = rtas_busy_delay_time(status);
- if (!wait_time) {
- printk(KERN_ERR "scanlog: unknown error " \
- "from rtas: %d\n", status);
- return -EIO;
- }
- }
- /* Apparently no data yet. Wait and try again. */
- msleep_interruptible(wait_time);
- }
- /*NOTREACHED*/
-}
-
-static ssize_t scanlog_write(struct file * file, const char __user * buf,
- size_t count, loff_t *ppos)
-{
- char stkbuf[20];
- int status;
-
- if (count > 19) count = 19;
- if (copy_from_user (stkbuf, buf, count)) {
- return -EFAULT;
- }
- stkbuf[count] = 0;
-
- if (buf) {
- if (strncmp(stkbuf, "reset", 5) == 0) {
- pr_debug("scanlog: reset scanlog\n");
- status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
- pr_debug("scanlog: rtas returns %d\n", status);
- }
- }
- return count;
-}
-
-static int scanlog_open(struct inode * inode, struct file * file)
-{
- unsigned int *data = scanlog_buffer;
-
- if (data[0] != 0) {
- /* This imperfect test stops a second copy of the
- * data (or a reset while data is being copied)
- */
- return -EBUSY;
- }
-
- data[0] = 0; /* re-init so we restart the scan */
-
- return 0;
-}
-
-static int scanlog_release(struct inode * inode, struct file * file)
-{
- unsigned int *data = scanlog_buffer;
-
- data[0] = 0;
- return 0;
-}
-
-static const struct proc_ops scanlog_proc_ops = {
- .proc_read = scanlog_read,
- .proc_write = scanlog_write,
- .proc_open = scanlog_open,
- .proc_release = scanlog_release,
- .proc_lseek = noop_llseek,
-};
-
-static int __init scanlog_init(void)
-{
- struct proc_dir_entry *ent;
- int err = -ENOMEM;
-
- ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
- if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE)
- return -ENODEV;
-
- /* Ideally we could allocate a buffer < 4G */
- scanlog_buffer = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
- if (!scanlog_buffer)
- goto err;
-
- ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL,
- &scanlog_proc_ops);
- if (!ent)
- goto err;
- return 0;
-err:
- kfree(scanlog_buffer);
- return err;
-}
-
-static void __exit scanlog_cleanup(void)
-{
- remove_proc_entry("powerpc/rtas/scan-log-dump", NULL);
- kfree(scanlog_buffer);
-}
-
-module_init(scanlog_init);
-module_exit(scanlog_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8a62af5b9c24..8ef3270515a9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -14,6 +14,7 @@
#include <linux/cpu.h>
#include <linux/errno.h>
+#include <linux/platform_device.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -36,14 +37,15 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/of_pci.h>
#include <linux/memblock.h>
#include <linux/swiotlb.h>
+#include <linux/seq_buf.h>
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/pci-bridge.h>
#include <asm/iommu.h>
@@ -72,12 +74,27 @@
#include <asm/svm.h>
#include <asm/dtl.h>
#include <asm/hvconsole.h>
+#include <asm/setup.h>
#include "pseries.h"
DEFINE_STATIC_KEY_FALSE(shared_processor);
EXPORT_SYMBOL(shared_processor);
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+#endif
+
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -112,7 +129,7 @@ static void __init fwnmi_init(void)
u8 *mce_data_buf;
unsigned int i;
int nr_cpus = num_possible_cpus();
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct slb_entry *slb_ptr;
size_t size;
#endif
@@ -152,7 +169,7 @@ static void __init fwnmi_init(void)
(RTAS_ERROR_LOG_MAX * i);
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!radix_enabled()) {
/* Allocate per cpu area to save old slb contents during MCE */
size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
@@ -169,6 +186,18 @@ static void __init fwnmi_init(void)
#endif
}
+/*
+ * Affix a device for the first timer to the platform bus if
+ * we have firmware support for the H_WATCHDOG hypercall.
+ */
+static __init int pseries_wdt_init(void)
+{
+ if (firmware_has_feature(FW_FEATURE_WATCHDOG))
+ platform_device_register_simple("pseries-wdt", 0, NULL, 0);
+ return 0;
+}
+machine_subsys_initcall(pseries, pseries_wdt_init);
+
static void pseries_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
@@ -353,6 +382,14 @@ static void pseries_lpar_idle(void)
pseries_idle_epilog();
}
+static bool pseries_reloc_on_exception_enabled;
+
+bool pseries_reloc_on_exception(void)
+{
+ return pseries_reloc_on_exception_enabled;
+}
+EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
+
/*
* Enable relocation on during exceptions. This has partition wide scope and
* may take a while to complete, if it takes longer than one second we will
@@ -377,6 +414,7 @@ bool pseries_enable_reloc_on_exc(void)
" on exceptions: %ld\n", rc);
return false;
}
+ pseries_reloc_on_exception_enabled = true;
return true;
}
@@ -404,22 +442,14 @@ void pseries_disable_reloc_on_exc(void)
break;
mdelay(get_longbusy_msecs(rc));
}
- if (rc != H_SUCCESS)
+ if (rc == H_SUCCESS)
+ pseries_reloc_on_exception_enabled = false;
+ else
pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
rc);
}
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
-#ifdef CONFIG_KEXEC_CORE
-static void pSeries_machine_kexec(struct kimage *image)
-{
- if (firmware_has_feature(FW_FEATURE_SET_MODE))
- pseries_disable_reloc_on_exc();
-
- default_machine_kexec(image);
-}
-#endif
-
#ifdef __LITTLE_ENDIAN__
void pseries_big_endian_exceptions(void)
{
@@ -447,7 +477,7 @@ void pseries_big_endian_exceptions(void)
panic("Could not enable big endian exceptions");
}
-void pseries_little_endian_exceptions(void)
+void __init pseries_little_endian_exceptions(void)
{
long rc;
@@ -647,7 +677,7 @@ static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
*/
num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
if (resno >= num_res)
- return 0; /* or an errror */
+ return 0; /* or an error */
i = START_OF_ENTRIES + NEXT_ENTRY * resno;
switch (value) {
@@ -751,7 +781,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
if (!pdev->is_physfn)
return;
- /*Firmware must support open sriov otherwise dont configure*/
+ /*Firmware must support open sriov otherwise don't configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
if (indexes)
of_pci_parse_iov_addrs(pdev, indexes);
@@ -801,7 +831,8 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_security_mitigations();
- pseries_lpar_read_hblkrm_characteristics();
+ if (!radix_enabled())
+ pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
@@ -818,6 +849,11 @@ static void __init pSeries_setup_arch(void)
if (lppaca_shared_proc(get_lppaca())) {
static_branch_enable(&shared_processor);
pv_spinlocks_init();
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+#endif
}
ppc_md.power_save = pseries_lpar_idle;
@@ -836,9 +872,7 @@ static void __init pSeries_setup_arch(void)
}
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
-
- if (swiotlb_force == SWIOTLB_FORCE)
- ppc_swiotlb_enable = 1;
+ pseries_rng_init();
}
static void pseries_panic(char *str)
@@ -905,7 +939,7 @@ void pSeries_coalesce_init(void)
* fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
* handle that here. (Stolen from parse_system_parameter_string)
*/
-static void pSeries_cmo_feature_init(void)
+static void __init pSeries_cmo_feature_init(void)
{
char *ptr, *key, *value, *end;
int call_status;
@@ -978,6 +1012,33 @@ static void pSeries_cmo_feature_init(void)
pr_debug(" <- fw_cmo_feature_init()\n");
}
+static void __init pseries_add_hw_description(void)
+{
+ struct device_node *dn;
+ const char *s;
+
+ dn = of_find_node_by_path("/openprom");
+ if (dn) {
+ if (of_property_read_string(dn, "model", &s) == 0)
+ seq_buf_printf(&ppc_hw_desc, "of:%s ", s);
+
+ of_node_put(dn);
+ }
+
+ dn = of_find_node_by_path("/hypervisor");
+ if (dn) {
+ if (of_property_read_string(dn, "compatible", &s) == 0)
+ seq_buf_printf(&ppc_hw_desc, "hv:%s ", s);
+
+ of_node_put(dn);
+ return;
+ }
+
+ if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
+ of_property_read_bool(of_root, "ibm,fw-net-version"))
+ seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
+}
+
/*
* Early initialization. Relocation is on but do not reference unbolted pages
*/
@@ -985,6 +1046,8 @@ static void __init pseries_init(void)
{
pr_debug(" -> pseries_init()\n");
+ pseries_add_hw_description();
+
#ifdef CONFIG_HVC_CONSOLE
if (firmware_has_feature(FW_FEATURE_LPAR))
hvc_vio_init_early();
@@ -1084,8 +1147,9 @@ define_machine(pseries) {
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_early = pseries_machine_check_realmode,
.machine_check_exception = pSeries_machine_check_exception,
+ .machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
- .machine_kexec = pSeries_machine_kexec,
+ .machine_kexec = pseries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index f47429323eee..fd2174edfa1d 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -27,7 +27,6 @@
#include <asm/irq.h>
#include <asm/page.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/paca.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index c5228f4969eb..3b4045d508ec 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -28,7 +28,7 @@ static int __init init_svm(void)
* need to use the SWIOTLB buffer for DMA even if dma_capable() says
* otherwise.
*/
- swiotlb_force = SWIOTLB_FORCE;
+ ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE;
/* Share the SWIOTLB buffer with the host. */
swiotlb_update_mem_attributes();
@@ -37,30 +37,6 @@ static int __init init_svm(void)
}
machine_early_initcall(pseries, init_svm);
-/*
- * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it
- * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have
- * any addressing limitation, we don't need to allocate it in low addresses.
- */
-void __init svm_swiotlb_init(void)
-{
- unsigned char *vstart;
- unsigned long bytes, io_tlb_nslabs;
-
- io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
- vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
- if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
- return;
-
-
- memblock_free(vstart, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
- panic("SVM: Cannot allocate SWIOTLB buffer");
-}
-
int set_memory_encrypted(unsigned long addr, int numpages)
{
if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c
new file mode 100644
index 000000000000..f9f682724e77
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2022-23 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "vas.h"
+
+#ifdef CONFIG_SYSFS
+static struct kobject *pseries_vas_kobj;
+static struct kobject *gzip_caps_kobj;
+
+struct vas_caps_entry {
+ struct kobject kobj;
+ struct vas_cop_feat_caps *caps;
+};
+
+#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj)
+
+/*
+ * This function is used to get the notification from the drmgr when
+ * QoS credits are changed.
+ */
+static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps,
+ const char *buf, size_t count)
+{
+ int err;
+ u16 creds;
+
+ err = kstrtou16(buf, 0, &creds);
+ /*
+ * The user space interface from the management console
+ * notifies OS with the new QoS credits and then the
+ * hypervisor. So OS has to use this new credits value
+ * and reconfigure VAS windows (close or reopen depends
+ * on the credits available) instead of depending on VAS
+ * QoS capabilities from the hypervisor.
+ */
+ if (!err)
+ err = vas_reconfig_capabilties(caps->win_type, creds);
+
+ if (err)
+ return -EINVAL;
+
+ pr_info("Set QoS total credits %u\n", creds);
+
+ return count;
+}
+
+#define sysfs_caps_entry_read(_name) \
+static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \
+{ \
+ return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \
+}
+
+struct vas_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vas_cop_feat_caps *, char *);
+ ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t);
+};
+
+#define VAS_ATTR_RO(_name) \
+ sysfs_caps_entry_read(_name); \
+ static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \
+ 0444, _name##_show, NULL);
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities
+ * This directory contains the following VAS GZIP capabilities
+ * for the default credit type.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_total_credits
+ * Total number of default credits assigned to the LPAR which
+ * can be changed with DLPAR operation.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_used_credits
+ * Number of credits used by the user space. One credit will
+ * be assigned for each window open.
+ *
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities
+ * This directory contains the following VAS GZIP capabilities
+ * for the Quality of Service (QoS) credit type.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_total_credits
+ * Total number of QoS credits assigned to the LPAR. The user
+ * has to define this value using HMC interface. It can be
+ * changed dynamically by the user.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_used_credits
+ * Number of credits used by the user space.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/update_total_credits
+ * Update total QoS credits dynamically
+ */
+
+VAS_ATTR_RO(nr_total_credits);
+VAS_ATTR_RO(nr_used_credits);
+
+static struct vas_sysfs_entry update_total_credits_attribute =
+ __ATTR(update_total_credits, 0200, NULL, update_total_credits_store);
+
+static struct attribute *vas_def_capab_attrs[] = {
+ &nr_total_credits_attribute.attr,
+ &nr_used_credits_attribute.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vas_def_capab);
+
+static struct attribute *vas_qos_capab_attrs[] = {
+ &nr_total_credits_attribute.attr,
+ &nr_used_credits_attribute.attr,
+ &update_total_credits_attribute.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vas_qos_capab);
+
+static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vas_caps_entry *centry;
+ struct vas_cop_feat_caps *caps;
+ struct vas_sysfs_entry *entry;
+
+ centry = to_caps_entry(kobj);
+ caps = centry->caps;
+ entry = container_of(attr, struct vas_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(caps, buf);
+}
+
+static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vas_caps_entry *centry;
+ struct vas_cop_feat_caps *caps;
+ struct vas_sysfs_entry *entry;
+
+ centry = to_caps_entry(kobj);
+ caps = centry->caps;
+ entry = container_of(attr, struct vas_sysfs_entry, attr);
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(caps, buf, count);
+}
+
+static void vas_type_release(struct kobject *kobj)
+{
+ struct vas_caps_entry *centry = to_caps_entry(kobj);
+ kfree(centry);
+}
+
+static const struct sysfs_ops vas_sysfs_ops = {
+ .show = vas_type_show,
+ .store = vas_type_store,
+};
+
+static struct kobj_type vas_def_attr_type = {
+ .release = vas_type_release,
+ .sysfs_ops = &vas_sysfs_ops,
+ .default_groups = vas_def_capab_groups,
+};
+
+static struct kobj_type vas_qos_attr_type = {
+ .release = vas_type_release,
+ .sysfs_ops = &vas_sysfs_ops,
+ .default_groups = vas_qos_capab_groups,
+};
+
+static char *vas_caps_kobj_name(struct vas_caps_entry *centry,
+ struct kobject **kobj)
+{
+ struct vas_cop_feat_caps *caps = centry->caps;
+
+ if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) {
+ kobject_init(&centry->kobj, &vas_qos_attr_type);
+ *kobj = gzip_caps_kobj;
+ return "qos_capabilities";
+ } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) {
+ kobject_init(&centry->kobj, &vas_def_attr_type);
+ *kobj = gzip_caps_kobj;
+ return "default_capabilities";
+ } else
+ return "Unknown";
+}
+
+/*
+ * Add feature specific capability dir entry.
+ * Ex: VDefGzip or VQosGzip
+ */
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+ struct vas_caps_entry *centry;
+ struct kobject *kobj = NULL;
+ int ret = 0;
+ char *name;
+
+ centry = kzalloc(sizeof(*centry), GFP_KERNEL);
+ if (!centry)
+ return -ENOMEM;
+
+ centry->caps = caps;
+ name = vas_caps_kobj_name(centry, &kobj);
+
+ if (kobj) {
+ ret = kobject_add(&centry->kobj, kobj, "%s", name);
+
+ if (ret) {
+ pr_err("VAS: sysfs kobject add / event failed %d\n",
+ ret);
+ kobject_put(&centry->kobj);
+ }
+ }
+
+ return ret;
+}
+
+static struct miscdevice vas_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vas",
+};
+
+/*
+ * Add VAS and VasCaps (overall capabilities) dir entries.
+ */
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+ int ret;
+
+ ret = misc_register(&vas_miscdev);
+ if (ret < 0) {
+ pr_err("%s: register vas misc device failed\n", __func__);
+ return ret;
+ }
+
+ /*
+ * The hypervisor does not expose multiple VAS instances, but can
+ * see multiple VAS instances on PowerNV. So create 'vas0' directory
+ * on pseries.
+ */
+ pseries_vas_kobj = kobject_create_and_add("vas0",
+ &vas_miscdev.this_device->kobj);
+ if (!pseries_vas_kobj) {
+ misc_deregister(&vas_miscdev);
+ pr_err("Failed to create VAS sysfs entry\n");
+ return -ENOMEM;
+ }
+
+ if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) ||
+ (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) {
+ gzip_caps_kobj = kobject_create_and_add("gzip",
+ pseries_vas_kobj);
+ if (!gzip_caps_kobj) {
+ pr_err("Failed to create VAS GZIP capability entry\n");
+ kobject_put(pseries_vas_kobj);
+ misc_deregister(&vas_miscdev);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+#else
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+ return 0;
+}
+
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+ return 0;
+}
+#endif
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index b043e3936d21..4ad6e510d405 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -16,6 +16,7 @@
#include <asm/machdep.h>
#include <asm/hvcall.h>
#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
#include <asm/vas.h>
#include "vas.h"
@@ -26,9 +27,11 @@
static struct vas_all_caps caps_all;
static bool copypaste_feat;
+static struct hv_vas_cop_feat_caps hv_cop_caps;
static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
static DEFINE_MUTEX(vas_pseries_mutex);
+static bool migration_in_progress;
static long hcall_return_busy_check(long rc)
{
@@ -107,7 +110,6 @@ static int h_deallocate_vas_window(u64 winid)
static int h_modify_vas_window(struct pseries_vas_window *win)
{
long rc;
- u32 lpid = mfspr(SPRN_PID);
/*
* AMR value is not supported in Linux VAS implementation.
@@ -115,7 +117,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win)
*/
do {
rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
- win->vas_win.winid, lpid, 0,
+ win->vas_win.winid, win->pid, 0,
VAS_MOD_WIN_FLAGS, 0);
rc = hcall_return_busy_check(rc);
@@ -124,8 +126,8 @@ static int h_modify_vas_window(struct pseries_vas_window *win)
if (rc == H_SUCCESS)
return 0;
- pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
- rc, win->vas_win.winid, lpid);
+ pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
+ rc, win->vas_win.winid, win->pid);
return -EIO;
}
@@ -151,8 +153,15 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
if (rc == H_SUCCESS)
return 0;
- pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
- hcall, rc, query_type, result);
+ /* H_FUNCTION means HV does not support VAS so don't print an error */
+ if (rc != H_FUNCTION) {
+ pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
+ (hcall == H_QUERY_VAS_CAPABILITIES) ?
+ "H_QUERY_VAS_CAPABILITIES" :
+ "H_QUERY_NX_CAPABILITIES",
+ rc, query_type, result);
+ }
+
return -EIO;
}
EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
@@ -191,17 +200,42 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref;
int rc;
- rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
- if (!rc) {
- tsk_ref = &txwin->vas_win.task_ref;
- vas_dump_crb(&crb);
- vas_update_csb(&crb, tsk_ref);
+ while (atomic_read(&txwin->pending_faults)) {
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+ atomic_dec(&txwin->pending_faults);
}
return IRQ_HANDLED;
}
/*
+ * irq_default_primary_handler() can be used only with IRQF_ONESHOT
+ * which disables IRQ before executing the thread handler and enables
+ * it after. But this disabling interrupt sets the VAS IRQ OFF
+ * state in the hypervisor. If the NX generates fault interrupt
+ * during this window, the hypervisor will not deliver this
+ * interrupt to the LPAR. So use VAS specific IRQ handler instead
+ * of calling the default primary handler.
+ */
+static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+
+ /*
+ * The thread hanlder will process this interrupt if it is
+ * already running.
+ */
+ atomic_inc(&txwin->pending_faults);
+
+ return IRQ_WAKE_THREAD;
+}
+
+/*
* Allocate window and setup IRQ mapping.
*/
static int allocate_setup_window(struct pseries_vas_window *txwin,
@@ -231,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq;
}
- rc = request_threaded_irq(txwin->fault_virq, NULL,
- pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+ rc = request_threaded_irq(txwin->fault_virq,
+ pseries_vas_irq_handler,
+ pseries_vas_fault_thread_fn, 0,
txwin->name, txwin);
if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
@@ -303,8 +338,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
cop_feat_caps = &caps->caps;
- if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
- atomic_read(&cop_feat_caps->target_lpar_creds)) {
+ if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
+ atomic_read(&cop_feat_caps->nr_total_credits)) {
pr_err("Credits are not available to allocate window\n");
rc = -EINVAL;
goto out;
@@ -324,13 +359,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* So no unpacking needs to be done.
*/
rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
- VPHN_FLAG_VCPU, smp_processor_id());
+ VPHN_FLAG_VCPU, hard_smp_processor_id());
if (rc != H_SUCCESS) {
pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
goto out;
}
}
+ txwin->pid = mfspr(SPRN_PID);
+
/*
* Allocate / Deallocate window hcalls and setup / free IRQs
* have to be protected with mutex.
@@ -347,7 +384,10 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
* same fault IRQ is not freed by the OS before.
*/
mutex_lock(&vas_pseries_mutex);
- rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+ if (migration_in_progress)
+ rc = -EBUSY;
+ else
+ rc = allocate_setup_window(txwin, (u64 *)&domain[0],
cop_feat_caps->win_type);
mutex_unlock(&vas_pseries_mutex);
if (rc)
@@ -362,13 +402,28 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
if (rc)
goto out_free;
- vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
txwin->win_type = cop_feat_caps->win_type;
mutex_lock(&vas_pseries_mutex);
- list_add(&txwin->win_list, &caps->list);
+ /*
+ * Possible to lose the acquired credit with DLPAR core
+ * removal after the window is opened. So if there are any
+ * closed windows (means with lost credits), do not give new
+ * window to user space. New windows will be opened only
+ * after the existing windows are reopened when credits are
+ * available.
+ */
+ if (!caps->nr_close_wins) {
+ list_add(&txwin->win_list, &caps->list);
+ caps->nr_open_windows++;
+ mutex_unlock(&vas_pseries_mutex);
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ return &txwin->vas_win;
+ }
mutex_unlock(&vas_pseries_mutex);
- return &txwin->vas_win;
+ put_vas_user_win_ref(&txwin->vas_win.task_ref);
+ rc = -EBUSY;
+ pr_err("No credit is available to allocate window\n");
out_free:
/*
@@ -378,7 +433,7 @@ out_free:
free_irq_setup(txwin);
h_deallocate_vas_window(txwin->vas_win.winid);
out:
- atomic_dec(&cop_feat_caps->used_lpar_creds);
+ atomic_dec(&cop_feat_caps->nr_used_credits);
kfree(txwin);
return ERR_PTR(rc);
}
@@ -431,14 +486,25 @@ static int vas_deallocate_window(struct vas_window *vwin)
caps = &vascaps[win->win_type].caps;
mutex_lock(&vas_pseries_mutex);
- rc = deallocate_free_window(win);
- if (rc) {
- mutex_unlock(&vas_pseries_mutex);
- return rc;
- }
+ /*
+ * VAS window is already closed in the hypervisor when
+ * lost the credit or with migration. So just remove the entry
+ * from the list, remove task references and free vas_window
+ * struct.
+ */
+ if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+ !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+ rc = deallocate_free_window(win);
+ if (rc) {
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+ }
+ } else
+ vascaps[win->win_type].nr_close_wins--;
list_del(&win->win_list);
- atomic_dec(&caps->used_lpar_creds);
+ atomic_dec(&caps->nr_used_credits);
+ vascaps[win->win_type].nr_open_windows--;
mutex_unlock(&vas_pseries_mutex);
put_vas_user_win_ref(&vwin->task_ref);
@@ -461,14 +527,10 @@ static const struct vas_user_win_ops vops_pseries = {
int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
const char *name)
{
- int rc;
-
if (!copypaste_feat)
return -ENOTSUPP;
- rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
-
- return rc;
+ return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
}
EXPORT_SYMBOL_GPL(vas_register_api_pseries);
@@ -482,7 +544,7 @@ EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
* Get the specific capabilities based on the feature type.
* Right now supports GZIP default and GZIP QoS capabilities.
*/
-static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
struct hv_vas_cop_feat_caps *hv_caps)
{
struct vas_cop_feat_caps *caps;
@@ -493,6 +555,7 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
memset(vcaps, 0, sizeof(*vcaps));
INIT_LIST_HEAD(&vcaps->list);
+ vcaps->feat = feat;
caps = &vcaps->caps;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
@@ -514,7 +577,7 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
}
caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
- atomic_set(&caps->target_lpar_creds,
+ atomic_set(&caps->nr_total_credits,
be16_to_cpu(hv_caps->target_lpar_creds));
if (feat == VAS_GZIP_DEF_FEAT) {
caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
@@ -526,16 +589,425 @@ static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
}
}
+ rc = sysfs_add_vas_caps(caps);
+ if (rc)
+ return rc;
+
copypaste_feat = true;
return 0;
}
+/*
+ * VAS windows can be closed due to lost credits when the core is
+ * removed. So reopen them if credits are available due to DLPAR
+ * core add and set the window active status. When NX sees the page
+ * fault on the unmapped paste address, the kernel handles the fault
+ * by setting the remapping to new paste address if the window is
+ * active.
+ */
+static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
+ bool migrate)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *caps = &vcaps->caps;
+ struct pseries_vas_window *win = NULL, *tmp;
+ int rc, mv_ents = 0;
+ int flag;
+
+ /*
+ * Nothing to do if there are no closed windows.
+ */
+ if (!vcaps->nr_close_wins)
+ return 0;
+
+ /*
+ * For the core removal, the hypervisor reduces the credits
+ * assigned to the LPAR and the kernel closes VAS windows
+ * in the hypervisor depends on reduced credits. The kernel
+ * uses LIFO (the last windows that are opened will be closed
+ * first) and expects to open in the same order when credits
+ * are available.
+ * For example, 40 windows are closed when the LPAR lost 2 cores
+ * (dedicated). If 1 core is added, this LPAR can have 20 more
+ * credits. It means the kernel can reopen 20 windows. So move
+ * 20 entries in the VAS windows lost and reopen next 20 windows.
+ * For partition migration, reopen all windows that are closed
+ * during resume.
+ */
+ if ((vcaps->nr_close_wins > creds) && !migrate)
+ mv_ents = vcaps->nr_close_wins - creds;
+
+ list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
+ if (!mv_ents)
+ break;
+
+ mv_ents--;
+ }
+
+ /*
+ * Open windows if they are closed only with migration or
+ * DLPAR (lost credit) before.
+ */
+ if (migrate)
+ flag = VAS_WIN_MIGRATE_CLOSE;
+ else
+ flag = VAS_WIN_NO_CRED_CLOSE;
+
+ list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
+ /*
+ * This window is closed with DLPAR and migration events.
+ * So reopen the window with the last event.
+ * The user space is not suspended with the current
+ * migration notifier. So the user space can issue DLPAR
+ * CPU hotplug while migration in progress. In this case
+ * this window will be opened with the last event.
+ */
+ if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+ (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+ win->vas_win.status &= ~flag;
+ continue;
+ }
+
+ /*
+ * Nothing to do on this window if it is not closed
+ * with this flag
+ */
+ if (!(win->vas_win.status & flag))
+ continue;
+
+ rc = allocate_setup_window(win, (u64 *)&domain[0],
+ caps->win_type);
+ if (rc)
+ return rc;
+
+ rc = h_modify_vas_window(win);
+ if (rc)
+ goto out;
+
+ mutex_lock(&win->vas_win.task_ref.mmap_mutex);
+ /*
+ * Set window status to active
+ */
+ win->vas_win.status &= ~flag;
+ mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
+ win->win_type = caps->win_type;
+ if (!--vcaps->nr_close_wins)
+ break;
+ }
+
+ return 0;
+out:
+ /*
+ * Window modify HCALL failed. So close the window to the
+ * hypervisor and return.
+ */
+ free_irq_setup(win);
+ h_deallocate_vas_window(win->vas_win.winid);
+ return rc;
+}
+
+/*
+ * The hypervisor reduces the available credits if the LPAR lost core. It
+ * means the excessive windows should not be active and the user space
+ * should not be using these windows to send compression requests to NX.
+ * So the kernel closes the excessive windows and unmap the paste address
+ * such that the user space receives paste instruction failure. Then up to
+ * the user space to fall back to SW compression and manage with the
+ * existing windows.
+ */
+static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
+ bool migrate)
+{
+ struct pseries_vas_window *win, *tmp;
+ struct vas_user_win_ref *task_ref;
+ struct vm_area_struct *vma;
+ int rc = 0, flag;
+
+ if (migrate)
+ flag = VAS_WIN_MIGRATE_CLOSE;
+ else
+ flag = VAS_WIN_NO_CRED_CLOSE;
+
+ list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
+ /*
+ * This window is already closed due to lost credit
+ * or for migration before. Go for next window.
+ * For migration, nothing to do since this window
+ * closed for DLPAR and will be reopened even on
+ * the destination system with other DLPAR operation.
+ */
+ if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
+ (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
+ win->vas_win.status |= flag;
+ continue;
+ }
+
+ task_ref = &win->vas_win.task_ref;
+ mutex_lock(&task_ref->mmap_mutex);
+ vma = task_ref->vma;
+ /*
+ * Number of available credits are reduced, So select
+ * and close windows.
+ */
+ win->vas_win.status |= flag;
+
+ mmap_write_lock(task_ref->mm);
+ /*
+ * vma is set in the original mapping. But this mapping
+ * is done with mmap() after the window is opened with ioctl.
+ * so we may not see the original mapping if the core remove
+ * is done before the original mmap() and after the ioctl.
+ */
+ if (vma)
+ zap_page_range(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
+
+ mmap_write_unlock(task_ref->mm);
+ mutex_unlock(&task_ref->mmap_mutex);
+ /*
+ * Close VAS window in the hypervisor, but do not
+ * free vas_window struct since it may be reused
+ * when the credit is available later (DLPAR with
+ * adding cores). This struct will be used
+ * later when the process issued with close(FD).
+ */
+ rc = deallocate_free_window(win);
+ /*
+ * This failure is from the hypervisor.
+ * No way to stop migration for these failures.
+ * So ignore error and continue closing other windows.
+ */
+ if (rc && !migrate)
+ return rc;
+
+ vcap->nr_close_wins++;
+
+ /*
+ * For migration, do not depend on lpar_creds in case if
+ * mismatch with the hypervisor value (should not happen).
+ * So close all active windows in the list and will be
+ * reopened windows based on the new lpar_creds on the
+ * destination system during resume.
+ */
+ if (!migrate && !--excess_creds)
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Get new VAS capabilities when the core add/removal configuration
+ * changes. Reconfig window configurations based on the credits
+ * availability from this new capabilities.
+ */
+int vas_reconfig_capabilties(u8 type, int new_nr_creds)
+{
+ struct vas_cop_feat_caps *caps;
+ int old_nr_creds;
+ struct vas_caps *vcaps;
+ int rc = 0, nr_active_wins;
+
+ if (type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Invalid credit type %d\n", type);
+ return -EINVAL;
+ }
+
+ vcaps = &vascaps[type];
+ caps = &vcaps->caps;
+
+ mutex_lock(&vas_pseries_mutex);
+
+ old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+ atomic_set(&caps->nr_total_credits, new_nr_creds);
+ /*
+ * The total number of available credits may be decreased or
+ * increased with DLPAR operation. Means some windows have to be
+ * closed / reopened. Hold the vas_pseries_mutex so that the
+ * user space can not open new windows.
+ */
+ if (old_nr_creds < new_nr_creds) {
+ /*
+ * If the existing target credits is less than the new
+ * target, reopen windows if they are closed due to
+ * the previous DLPAR (core removal).
+ */
+ rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
+ false);
+ } else {
+ /*
+ * # active windows is more than new LPAR available
+ * credits. So close the excessive windows.
+ * On pseries, each window will have 1 credit.
+ */
+ nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
+ if (nr_active_wins > new_nr_creds)
+ rc = reconfig_close_windows(vcaps,
+ nr_active_wins - new_nr_creds,
+ false);
+ }
+
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+}
+
+int pseries_vas_dlpar_cpu(void)
+{
+ int new_nr_creds, rc;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
+ }
+
+ if (rc)
+ pr_err("Failed reconfig VAS capabilities with DLPAR\n");
+
+ return rc;
+}
+
+/*
+ * Total number of default credits available (target_credits)
+ * in LPAR depends on number of cores configured. It varies based on
+ * whether processors are in shared mode or dedicated mode.
+ * Get the notifier when CPU configuration is changed with DLPAR
+ * operation so that get the new target_credits (vas default capabilities)
+ * and then update the existing windows usage if needed.
+ */
+static int pseries_vas_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct of_reconfig_data *rd = data;
+ struct device_node *dn = rd->dn;
+ const __be32 *intserv = NULL;
+ int len;
+
+ /*
+ * For shared CPU partition, the hypervisor assigns total credits
+ * based on entitled core capacity. So updating VAS windows will
+ * be called from lparcfg_write().
+ */
+ if (is_shared_processor())
+ return NOTIFY_OK;
+
+ if ((action == OF_RECONFIG_ATTACH_NODE) ||
+ (action == OF_RECONFIG_DETACH_NODE))
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
+ &len);
+ /*
+ * Processor config is not changed
+ */
+ if (!intserv)
+ return NOTIFY_OK;
+
+ return pseries_vas_dlpar_cpu();
+}
+
+static struct notifier_block pseries_vas_nb = {
+ .notifier_call = pseries_vas_notifier,
+};
+
+/*
+ * For LPM, all windows have to be closed on the source partition
+ * before migration and reopen them on the destination partition
+ * after migration. So closing windows during suspend and
+ * reopen them during resume.
+ */
+int vas_migration_handler(int action)
+{
+ struct vas_cop_feat_caps *caps;
+ int old_nr_creds, new_nr_creds = 0;
+ struct vas_caps *vcaps;
+ int i, rc = 0;
+
+ /*
+ * NX-GZIP is not enabled. Nothing to do for migration.
+ */
+ if (!copypaste_feat)
+ return rc;
+
+ mutex_lock(&vas_pseries_mutex);
+
+ if (action == VAS_SUSPEND)
+ migration_in_progress = true;
+ else
+ migration_in_progress = false;
+
+ for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
+ vcaps = &vascaps[i];
+ caps = &vcaps->caps;
+ old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vcaps->feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ /*
+ * Should not happen. But incase print messages, close
+ * all windows in the list during suspend and reopen
+ * windows based on new lpar_creds on the destination
+ * system.
+ */
+ if (old_nr_creds != new_nr_creds) {
+ pr_err("Target credits mismatch with the hypervisor\n");
+ pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
+ action, old_nr_creds, new_nr_creds);
+ pr_err("Used creds: %d, Active creds: %d\n",
+ atomic_read(&caps->nr_used_credits),
+ vcaps->nr_open_windows - vcaps->nr_close_wins);
+ }
+ } else {
+ pr_err("state(%d): Get VAS capabilities failed with %d\n",
+ action, rc);
+ /*
+ * We can not stop migration with the current lpm
+ * implementation. So continue closing all windows in
+ * the list (during suspend) and return without
+ * opening windows (during resume) if VAS capabilities
+ * HCALL failed.
+ */
+ if (action == VAS_RESUME)
+ goto out;
+ }
+
+ switch (action) {
+ case VAS_SUSPEND:
+ rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
+ true);
+ break;
+ case VAS_RESUME:
+ atomic_set(&caps->nr_total_credits, new_nr_creds);
+ rc = reconfig_open_windows(vcaps, new_nr_creds, true);
+ break;
+ default:
+ /* should not happen */
+ pr_err("Invalid migration action %d\n", action);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Ignore errors during suspend and return for resume.
+ */
+ if (rc && (action == VAS_RESUME))
+ goto out;
+ }
+
+out:
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+}
+
static int __init pseries_vas_init(void)
{
- struct hv_vas_cop_feat_caps *hv_cop_caps;
struct hv_vas_all_caps *hv_caps;
- int rc;
+ int rc = 0;
/*
* Linux supports user space COPY/PASTE only with Radix
@@ -559,35 +1031,39 @@ static int __init pseries_vas_init(void)
caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
- hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
- if (!hv_cop_caps) {
- rc = -ENOMEM;
- goto out;
- }
+ sysfs_pseries_vas_init(&caps_all);
+
/*
* QOS capabilities available
*/
if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
- VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+ VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
if (rc)
- goto out_cop;
+ goto out;
}
/*
* Default capabilities available
*/
- if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
- VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
- if (rc)
- goto out_cop;
- }
+ VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
+
+ if (!rc && copypaste_feat) {
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ of_reconfig_notifier_register(&pseries_vas_nb);
- pr_info("GZIP feature is available\n");
+ pr_info("GZIP feature is available\n");
+ } else {
+ /*
+ * Should not happen, but only when get default
+ * capabilities HCALL failed. So disable copy paste
+ * feature.
+ */
+ copypaste_feat = false;
+ }
-out_cop:
- kfree(hv_cop_caps);
out:
kfree(hv_caps);
return rc;
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 4ecb3fcabd10..7115043ec488 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -30,6 +30,14 @@
#define VAS_COPY_PASTE_USER_MODE 0x00000001
#define VAS_COP_OP_USER_MODE 0x00000010
+#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970
+#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970
+
+enum vas_migrate_action {
+ VAS_SUSPEND,
+ VAS_RESUME,
+};
+
/*
* Co-processor feature - GZIP QoS windows or GZIP default windows
*/
@@ -72,9 +80,8 @@ struct vas_cop_feat_caps {
};
/* Total LPAR available credits. Can be different from max LPAR */
/* credits due to DLPAR operation */
- atomic_t target_lpar_creds;
- atomic_t used_lpar_creds; /* Used credits so far */
- u16 avail_lpar_creds; /* Remaining available credits */
+ atomic_t nr_total_credits; /* Total credits assigned to LPAR */
+ atomic_t nr_used_credits; /* Used credits so far */
};
/*
@@ -84,6 +91,9 @@ struct vas_cop_feat_caps {
struct vas_caps {
struct vas_cop_feat_caps caps;
struct list_head list; /* List of open windows */
+ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */
+ int nr_open_windows; /* Number of successful open windows */
+ u8 feat; /* Feature type */
};
/*
@@ -115,11 +125,31 @@ struct pseries_vas_window {
u64 domain[6]; /* Associativity domain Ids */
/* this window is allocated */
u64 util;
+ u32 pid; /* PID associated with this window */
/* List of windows opened which is used for LPM */
struct list_head win_list;
u64 flags;
char *name;
int fault_virq;
+ atomic_t pending_faults; /* Number of pending faults */
};
+
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
+int vas_reconfig_capabilties(u8 type, int new_nr_creds);
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
+
+#ifdef CONFIG_PPC_VAS
+int vas_migration_handler(int action);
+int pseries_vas_dlpar_cpu(void);
+#else
+static inline int vas_migration_handler(int action)
+{
+ return 0;
+}
+static inline int pseries_vas_dlpar_cpu(void)
+{
+ return 0;
+}
+#endif
#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index feafcb582e1b..00ecac2c205b 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -23,6 +23,7 @@
#include <linux/dma-map-ops.h>
#include <linux/kobject.h>
#include <linux/kexec.h>
+#include <linux/of_irq.h>
#include <asm/iommu.h>
#include <asm/dma.h>
@@ -1061,7 +1062,7 @@ static struct attribute *vio_bus_attrs[] = {
};
ATTRIBUTE_GROUPS(vio_bus);
-static void vio_cmo_sysfs_init(void)
+static void __init vio_cmo_sysfs_init(void)
{
vio_bus_type.dev_groups = vio_cmo_dev_groups;
vio_bus_type.bus_groups = vio_bus_groups;
@@ -1073,7 +1074,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
static void vio_cmo_bus_init(void) {}
-static void vio_cmo_sysfs_init(void) { }
+static void __init vio_cmo_sysfs_init(void) { }
#endif /* CONFIG_PPC_SMLPAR */
EXPORT_SYMBOL(vio_cmo_entitlement_update);
EXPORT_SYMBOL(vio_cmo_set_dev_desired);
@@ -1479,7 +1480,7 @@ EXPORT_SYMBOL(vio_register_device_node);
* Starting from the root node provide, register the device node for
* each child beneath the root.
*/
-static void vio_bus_scan_register_devices(char *root_name)
+static void __init vio_bus_scan_register_devices(char *root_name)
{
struct device_node *node_root, *node_child;