aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c19
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c27
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c92
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S39
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c1
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c49
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c38
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c2
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c48
-rw-r--r--arch/powerpc/platforms/pseries/msi.c25
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c149
-rw-r--r--arch/powerpc/platforms/pseries/pci.c23
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c2
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h5
-rw-r--r--arch/powerpc/platforms/pseries/ras.c8
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c17
-rw-r--r--arch/powerpc/platforms/pseries/smp.c4
-rw-r--r--arch/powerpc/platforms/pseries/svm.c6
-rw-r--r--arch/powerpc/platforms/pseries/vas.c595
-rw-r--r--arch/powerpc/platforms/pseries/vas.h125
-rw-r--r--arch/powerpc/platforms/pseries/vio.c27
25 files changed, 1170 insertions, 140 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index c8a2b0b05ac0..4cda0ef87be0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o
obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
obj-$(CONFIG_SUSPEND) += suspend.o
+obj-$(CONFIG_PPC_VAS) += vas.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 233503fcf8f0..b1f01ac0c29e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index)
{
int dr_status, rc;
- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
- DR_ENTITY_SENSE, drc_index);
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
if (rc || dr_status != DR_ENTITY_UNUSABLE)
return -1;
@@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index)
{
int dr_status, rc;
- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
- DR_ENTITY_SENSE, drc_index);
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
if (rc || dr_status != DR_ENTITY_PRESENT)
return -1;
@@ -329,6 +327,19 @@ int dlpar_release_drc(u32 drc_index)
return 0;
}
+int dlpar_unisolate_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+
+ return 0;
+}
+
int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
{
int rc;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 12cbffd3c2e3..7e970f81d8ff 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -47,9 +47,6 @@ static void rtas_stop_self(void)
BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
- printk("cpu %u (hwid %u) Ready to die...\n",
- smp_processor_id(), hard_smp_processor_id());
-
rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
panic("Alas, I survived.\n");
@@ -271,6 +268,19 @@ static int dlpar_offline_cpu(struct device_node *dn)
if (!cpu_online(cpu))
break;
+ /*
+ * device_offline() will return -EBUSY (via cpu_down()) if there
+ * is only one CPU left. Check it here to fail earlier and with a
+ * more informative error message, while also retaining the
+ * cpu_add_remove_lock to be sure that no CPUs are being
+ * online/offlined during this check.
+ */
+ if (num_online_cpus() == 1) {
+ pr_warn("Unable to remove last online CPU %pOFn\n", dn);
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+
cpu_maps_update_done();
rc = device_offline(get_cpu_device(cpu));
if (rc)
@@ -283,6 +293,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
thread);
}
}
+out_unlock:
cpu_maps_update_done();
out:
@@ -802,8 +813,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
case PSERIES_HP_ELOG_ACTION_REMOVE:
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
rc = dlpar_cpu_remove_by_count(count);
- else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+ else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
rc = dlpar_cpu_remove_by_index(drc_index);
+ /*
+ * Setting the isolation state of an UNISOLATED/CONFIGURED
+ * device to UNISOLATE is a no-op, but the hypervisor can
+ * use it as a hint that the CPU removal failed.
+ */
+ if (rc)
+ dlpar_unisolate_drc(drc_index);
+ }
else
rc = -EINVAL;
break;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8377f1f7c78e..377d852f5a9a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np)
static bool lmb_is_removable(struct drmem_lmb *lmb)
{
- if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
return false;
#ifdef CONFIG_FA_DUMP
@@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
{
struct drmem_lmb *lmb;
- int lmbs_removed = 0;
+ int lmbs_reserved = 0;
int lmbs_available = 0;
int rc;
@@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
*/
drmem_mark_lmb_reserved(lmb);
- lmbs_removed++;
- if (lmbs_removed == lmbs_to_remove)
+ lmbs_reserved++;
+ if (lmbs_reserved == lmbs_to_remove)
break;
}
- if (lmbs_removed != lmbs_to_remove) {
+ if (lmbs_reserved != lmbs_to_remove) {
pr_err("Memory hot-remove failed, adding LMB's back\n");
for_each_drmem_lmb(lmb) {
@@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
+
+ lmbs_reserved--;
+ if (lmbs_reserved == 0)
+ break;
}
rc = -EINVAL;
@@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
lmb->base_addr);
drmem_remove_lmb_reservation(lmb);
+
+ lmbs_reserved--;
+ if (lmbs_reserved == 0)
+ break;
}
rc = 0;
}
@@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
{
struct drmem_lmb *lmb, *start_lmb, *end_lmb;
- int lmbs_available = 0;
int rc;
pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
@@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
if (rc)
return -EINVAL;
- /* Validate that there are enough LMBs to satisfy the request */
+ /*
+ * Validate that all LMBs in range are not reserved. Note that it
+ * is ok if they are !ASSIGNED since our goal here is to remove the
+ * LMB range, regardless of whether some LMBs were already removed
+ * by any other reason.
+ *
+ * This is a contrast to what is done in remove_by_count() where we
+ * check for both RESERVED and !ASSIGNED (via lmb_is_removable()),
+ * because we want to remove a fixed amount of LMBs in that function.
+ */
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
- if (lmb->flags & DRCONF_MEM_RESERVED)
- break;
-
- lmbs_available++;
+ if (lmb->flags & DRCONF_MEM_RESERVED) {
+ pr_err("Memory at %llx (drc index %x) is reserved\n",
+ lmb->base_addr, lmb->drc_index);
+ return -EINVAL;
+ }
}
- if (lmbs_available < lmbs_to_remove)
- return -EINVAL;
-
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ /*
+ * dlpar_remove_lmb() will error out if the LMB is already
+ * !ASSIGNED, but this case is a no-op for us.
+ */
if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
@@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
if (!drmem_lmb_reserved(lmb))
continue;
+ /*
+ * Setting the isolation state of an UNISOLATED/CONFIGURED
+ * device to UNISOLATE is a no-op, but the hypervisor can
+ * use it as a hint that the LMB removal failed.
+ */
+ dlpar_unisolate_drc(lmb->drc_index);
+
rc = dlpar_add_lmb(lmb);
if (rc)
pr_err("Failed to add LMB, drc index %x\n",
@@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np)
{
return 0;
}
-static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
-{
- return -EOPNOTSUPP;
-}
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
return -EOPNOTSUPP;
@@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
{
struct drmem_lmb *lmb;
int lmbs_available = 0;
- int lmbs_added = 0;
+ int lmbs_reserved = 0;
int rc;
pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
@@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
/* Validate that there are enough LMBs to satisfy the request */
for_each_drmem_lmb(lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
+ continue;
+
if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
lmbs_available++;
@@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
* requested LMBs cannot be added.
*/
drmem_mark_lmb_reserved(lmb);
-
- lmbs_added++;
- if (lmbs_added == lmbs_to_add)
+ lmbs_reserved++;
+ if (lmbs_reserved == lmbs_to_add)
break;
}
- if (lmbs_added != lmbs_to_add) {
+ if (lmbs_reserved != lmbs_to_add) {
pr_err("Memory hot-add failed, removing any added LMBs\n");
for_each_drmem_lmb(lmb) {
@@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
dlpar_release_drc(lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
+ lmbs_reserved--;
+
+ if (lmbs_reserved == 0)
+ break;
}
rc = -EINVAL;
} else {
@@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
pr_debug("Memory at %llx (drc index %x) was hot-added\n",
lmb->base_addr, lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
+ lmbs_reserved--;
+
+ if (lmbs_reserved == 0)
+ break;
}
rc = 0;
}
@@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index)
static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
{
struct drmem_lmb *lmb, *start_lmb, *end_lmb;
- int lmbs_available = 0;
int rc;
pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
@@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
/* Validate that the LMBs in this range are not reserved */
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
- if (lmb->flags & DRCONF_MEM_RESERVED)
- break;
-
- lmbs_available++;
+ /* Fail immediately if the whole range can't be hot-added */
+ if (lmb->flags & DRCONF_MEM_RESERVED) {
+ pr_err("Memory at %llx (drc index %x) is reserved\n",
+ lmb->base_addr, lmb->drc_index);
+ return -EINVAL;
+ }
}
- if (lmbs_available < lmbs_to_add)
- return -EINVAL;
-
for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
if (lmb->flags & DRCONF_MEM_ASSIGNED)
continue;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 2136e42833af..ab9fc6506861 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -102,6 +102,20 @@ END_FTR_SECTION(0, 1); \
#define HCALL_BRANCH(LABEL)
#endif
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+ HVSC /* invoke the hypervisor */
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr /* return r3 = status */
+
_GLOBAL_TOC(plpar_hcall_norets)
HMT_MEDIUM
@@ -110,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets)
HCALL_BRANCH(plpar_hcall_norets_trace)
HVSC /* invoke the hypervisor */
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
@@ -119,6 +136,10 @@ plpar_hcall_norets_trace:
HCALL_INST_PRECALL(R4)
HVSC
HCALL_INST_POSTCALL_NORETS
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
blr
@@ -149,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall)
std r6, 16(r12)
std r7, 24(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -178,6 +202,9 @@ plpar_hcall_trace:
HCALL_INST_POSTCALL(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -213,6 +240,9 @@ _GLOBAL(plpar_hcall_raw)
std r6, 16(r12)
std r7, 24(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -252,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9)
std r11,56(r12)
std r0, 64(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -290,6 +323,9 @@ plpar_hcall9_trace:
HCALL_INST_POSTCALL(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
@@ -329,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw)
std r11,56(r12)
std r0, 64(r12)
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
lwz r0,8(r1)
mtcrf 0xff,r0
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 2c59b4986ea5..3a50612a78db 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,7 +26,7 @@ struct hcall_stats {
};
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
-DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
/*
* Routines for displaying the statistics in debugfs
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a15ab33646b3..c6c79ef55e13 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -42,6 +42,7 @@
#include <linux/kobject.h>
#include <linux/dma-map-ops.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/stat.h>
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 9fc5217f0c8e..0c55b991f665 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
iommu_table_setparms(pci->phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, pci->phb->node, 0, 0);
+ if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
ppci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, ppci->phb->node, 0, 0);
+ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, phb->node, 0, 0);
+ if (!iommu_init_table(tbl, phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
set_iommu_table_base(&dev->dev, tbl);
return;
}
@@ -1099,6 +1103,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
ret);
}
+/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
+static int iommu_get_page_shift(u32 query_page_size)
+{
+ /* Supported IO page-sizes according to LoPAR */
+ const int shift[] = {
+ __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
+ __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
+ __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G)
+ };
+
+ int i = ARRAY_SIZE(shift) - 1;
+
+ /*
+ * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
+ * - bit 31 means 4k pages are supported,
+ * - bit 30 means 64k pages are supported, and so on.
+ * Larger pagesizes map more memory with the same amount of TCEs, so start probing them.
+ */
+ for (; i >= 0 ; i--) {
+ if (query_page_size & (1 << i))
+ return shift[i];
+ }
+
+ /* No valid page size found. */
+ return 0;
+}
+
/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
@@ -1206,13 +1237,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_failed;
}
}
- if (query.page_size & 4) {
- page_shift = 24; /* 16MB */
- } else if (query.page_size & 2) {
- page_shift = 16; /* 64kB */
- } else if (query.page_size & 1) {
- page_shift = 12; /* 4kB */
- } else {
+
+ page_shift = iommu_get_page_shift(query.page_size);
+ if (!page_shift) {
dev_dbg(&dev->dev, "no supported direct page size in mask %x",
query.page_size);
goto out_failed;
@@ -1229,7 +1256,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
if (pmem_present) {
if (query.largest_available_block >=
(1ULL << (MAX_PHYSMEM_BITS - page_shift)))
- len = MAX_PHYSMEM_BITS - page_shift;
+ len = MAX_PHYSMEM_BITS;
else
dev_info(&dev->dev, "Skipping ibm,pmemory");
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 764170fdb0f7..dab356e3ff87 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -887,7 +887,8 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
want_v = hpte_encode_avpn(vpn, psize, ssize);
- flags = (newpp & 7) | H_AVPN;
+ flags = (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO)) | H_AVPN;
+ flags |= (newpp & HPTE_R_KEY_HI) >> 48;
if (mmu_has_feature(MMU_FTR_KERNEL_RO))
/* Move pp0 into bit 8 (IBM 55) */
flags |= (newpp & HPTE_R_PP0) >> 55;
@@ -976,11 +977,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
BUG_ON(slot == -1);
- flags = newpp & 7;
+ flags = newpp & (HPTE_R_PP | HPTE_R_N);
if (mmu_has_feature(MMU_FTR_KERNEL_RO))
/* Move pp0 into bit 8 (IBM 55) */
flags |= (newpp & HPTE_R_PP0) >> 55;
+ flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO);
+
lpar_rc = plpar_pte_protect(flags, slot, 0);
BUG_ON(lpar_rc != H_SUCCESS);
@@ -1629,7 +1632,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
}
msleep(delay);
rc = plpar_resize_hpt_prepare(0, shift);
- };
+ }
switch (rc) {
case H_SUCCESS:
@@ -1826,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void)
#endif
/*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
*/
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
-void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
unsigned long flags;
unsigned int *depth;
- /*
- * We cannot call tracepoints inside RCU idle regions which
- * means we must not trace H_CEDE.
- */
- if (opcode == H_CEDE)
- return;
-
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
- if (*depth)
+ if (WARN_ON_ONCE(*depth))
goto out;
(*depth)++;
@@ -1861,19 +1862,16 @@ out:
local_irq_restore(flags);
}
-void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
{
unsigned long flags;
unsigned int *depth;
- if (opcode == H_CEDE)
- return;
-
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
- if (*depth)
+ if (*depth) /* Don't warn again on the way out */
goto out;
(*depth)++;
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index e278390ab28d..f71eac74ea92 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
parse_em_data(m);
maxmem_data(m);
+ seq_printf(m, "security_flavor=%u\n", pseries_security_flavor);
+
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index ea4d6a660e0d..e83e0891272d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -452,12 +452,28 @@ static int do_suspend(void)
return ret;
}
+/**
+ * struct pseries_suspend_info - State shared between CPUs for join/suspend.
+ * @counter: Threads are to increment this upon resuming from suspend
+ * or if an error is received from H_JOIN. The thread which performs
+ * the first increment (i.e. sets it to 1) is responsible for
+ * waking the other threads.
+ * @done: False if join/suspend is in progress. True if the operation is
+ * complete (successful or not).
+ */
+struct pseries_suspend_info {
+ atomic_t counter;
+ bool done;
+};
+
static int do_join(void *arg)
{
- atomic_t *counter = arg;
+ struct pseries_suspend_info *info = arg;
+ atomic_t *counter = &info->counter;
long hvrc;
int ret;
+retry:
/* Must ensure MSR.EE off for H_JOIN. */
hard_irq_disable();
hvrc = plpar_hcall_norets(H_JOIN);
@@ -473,8 +489,20 @@ static int do_join(void *arg)
case H_SUCCESS:
/*
* The suspend is complete and this cpu has received a
- * prod.
+ * prod, or we've received a stray prod from unrelated
+ * code (e.g. paravirt spinlocks) and we need to join
+ * again.
+ *
+ * This barrier orders the return from H_JOIN above vs
+ * the load of info->done. It pairs with the barrier
+ * in the wakeup/prod path below.
*/
+ smp_mb();
+ if (READ_ONCE(info->done) == false) {
+ pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying",
+ smp_processor_id());
+ goto retry;
+ }
ret = 0;
break;
case H_BAD_MODE:
@@ -488,6 +516,13 @@ static int do_join(void *arg)
if (atomic_inc_return(counter) == 1) {
pr_info("CPU %u waking all threads\n", smp_processor_id());
+ WRITE_ONCE(info->done, true);
+ /*
+ * This barrier orders the store to info->done vs subsequent
+ * H_PRODs to wake the other CPUs. It pairs with the barrier
+ * in the H_SUCCESS case above.
+ */
+ smp_mb();
prod_others();
}
/*
@@ -535,11 +570,16 @@ static int pseries_suspend(u64 handle)
int ret;
while (true) {
- atomic_t counter = ATOMIC_INIT(0);
+ struct pseries_suspend_info info;
unsigned long vasi_state;
int vasi_err;
- ret = stop_machine(do_join, &counter, cpu_online_mask);
+ info = (struct pseries_suspend_info) {
+ .counter = ATOMIC_INIT(0),
+ .done = false,
+ };
+
+ ret = stop_machine(do_join, &info, cpu_online_mask);
if (ret == 0)
break;
/*
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index b3ac2455faad..637300330507 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -4,6 +4,7 @@
* Copyright 2006-2007 Michael Ellerman, IBM Corp.
*/
+#include <linux/crash_dump.h>
#include <linux/device.h>
#include <linux/irq.h>
#include <linux/msi.h>
@@ -458,8 +459,28 @@ again:
return hwirq;
}
- virq = irq_create_mapping_affinity(NULL, hwirq,
- entry->affinity);
+ /*
+ * Depending on the number of online CPUs in the original
+ * kernel, it is likely for CPU #0 to be offline in a kdump
+ * kernel. The associated IRQs in the affinity mappings
+ * provided by irq_create_affinity_masks() are thus not
+ * started by irq_startup(), as per-design for managed IRQs.
+ * This can be a problem with multi-queue block devices driven
+ * by blk-mq : such a non-started IRQ is very likely paired
+ * with the single queue enforced by blk-mq during kdump (see
+ * blk_mq_alloc_tag_set()). This causes the device to remain
+ * silent and likely hangs the guest at some point.
+ *
+ * We don't really care for fine-grained affinity when doing
+ * kdump actually : simply ignore the pre-computed affinity
+ * masks in this case and let the default mask with all CPUs
+ * be used when creating the IRQ mappings.
+ */
+ if (is_kdump_kernel())
+ virq = irq_create_mapping(NULL, hwirq);
+ else
+ virq = irq_create_mapping_affinity(NULL, hwirq,
+ entry->affinity);
if (!virq) {
pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 835163f54244..f48e87ac89c9 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -18,6 +18,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/papr_pdsm.h>
#include <asm/mce.h>
+#include <asm/unaligned.h>
#define BIND_ANY_ADDR (~0ul)
@@ -93,6 +94,7 @@ struct papr_scm_priv {
uint64_t block_size;
int metadata_size;
bool is_volatile;
+ bool hcall_flush_required;
uint64_t bound_addr;
@@ -113,10 +115,45 @@ struct papr_scm_priv {
/* Health information for the dimm */
u64 health_bitmap;
+ /* Holds the last known dirty shutdown counter value */
+ u64 dirty_shutdown_counter;
+
/* length of the stat buffer as expected by phyp */
size_t stat_buffer_len;
};
+static int papr_scm_pmem_flush(struct nd_region *nd_region,
+ struct bio *bio __maybe_unused)
+{
+ struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+ unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0;
+ long rc;
+
+ dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
+
+ do {
+ rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
+ token = ret_buf[0];
+
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+ } while (rc == H_BUSY);
+
+ if (rc) {
+ dev_err(&p->pdev->dev, "flush error: %ld", rc);
+ rc = -EIO;
+ } else {
+ dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
+ }
+
+ return rc;
+}
+
static LIST_HEAD(papr_nd_regions);
static DEFINE_MUTEX(papr_ndr_lock);
@@ -227,7 +264,7 @@ err_out:
* Query the Dimm performance stats from PHYP and copy them (if returned) to
* provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
* (num_stats + header) bytes.
- * - If buff_stats == NULL the return value is the size in byes of the buffer
+ * - If buff_stats == NULL the return value is the size in bytes of the buffer
* needed to hold all supported performance-statistics.
* - If buff_stats != NULL and num_stats == 0 then we copy all known
* performance-statistics to 'buff_stat' and expect to be large enough to
@@ -277,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
dev_err(&p->pdev->dev,
"Unknown performance stats, Err:0x%016lX\n", ret[0]);
return -ENOENT;
+ } else if (rc == H_AUTHORITY) {
+ dev_info(&p->pdev->dev,
+ "Permission denied while accessing performance stats");
+ return -EPERM;
+ } else if (rc == H_UNSUPPORTED) {
+ dev_dbg(&p->pdev->dev, "Performance stats unsupported\n");
+ return -EOPNOTSUPP;
} else if (rc != H_SUCCESS) {
dev_err(&p->pdev->dev,
"Failed to query performance stats, Err:%lld\n", rc);
@@ -563,6 +607,16 @@ free_stats:
return rc;
}
+/* Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_dsc(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
+ payload->health.dimm_dsc = p->dirty_shutdown_counter;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
/* Fetch the DIMM health info and populate it in provided package. */
static int papr_pdsm_health(struct papr_scm_priv *p,
union nd_pdsm_payload *payload)
@@ -606,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p,
/* Populate the fuel gauge meter in the payload */
papr_pdsm_fuel_gauge(p, payload);
+ /* Populate the dirty-shutdown-counter field */
+ papr_pdsm_dsc(p, payload);
rc = sizeof(struct nd_papr_pdsm_health);
@@ -867,15 +923,41 @@ static ssize_t flags_show(struct device *dev,
}
DEVICE_ATTR_RO(flags);
+static ssize_t dirty_shutdown_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
+}
+DEVICE_ATTR_RO(dirty_shutdown);
+
+static umode_t papr_nd_attribute_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
+
+ /* For if perf-stats not available remove perf_stats sysfs */
+ if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
+ return 0;
+
+ return attr->mode;
+}
+
/* papr_scm specific dimm attributes */
static struct attribute *papr_nd_attributes[] = {
&dev_attr_flags.attr,
&dev_attr_perf_stats.attr,
+ &dev_attr_dirty_shutdown.attr,
NULL,
};
static struct attribute_group papr_nd_attribute_group = {
.name = "papr",
+ .is_visible = papr_nd_attribute_visible,
.attrs = papr_nd_attributes,
};
@@ -891,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
struct nd_region_desc ndr_desc;
unsigned long dimm_flags;
int target_nid, online_nid;
- ssize_t stat_size;
p->bus_desc.ndctl = papr_scm_ndctl;
p->bus_desc.module = THIS_MODULE;
@@ -914,6 +995,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
dimm_flags = 0;
set_bit(NDD_LABELING, &dimm_flags);
+ /*
+ * Check if the nvdimm is unarmed. No locking needed as we are still
+ * initializing. Ignore error encountered if any.
+ */
+ __drc_pmem_query_health(p);
+
+ if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK)
+ set_bit(NDD_UNARMED, &dimm_flags);
+
p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups,
dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
if (!p->nvdimm) {
@@ -943,6 +1033,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
ndr_desc.num_mappings = 1;
ndr_desc.nd_set = &p->nd_set;
+ if (p->hcall_flush_required) {
+ set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+ ndr_desc.flush = papr_scm_pmem_flush;
+ }
+
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
else {
@@ -962,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
list_add_tail(&p->region_list, &papr_nd_regions);
mutex_unlock(&papr_ndr_lock);
- /* Try retriving the stat buffer and see if its supported */
- stat_size = drc_pmem_query_stats(p, NULL, 0);
- if (stat_size > 0) {
- p->stat_buffer_len = stat_size;
- dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
- p->stat_buffer_len);
- } else {
- dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
- }
-
return 0;
err: nvdimm_bus_unregister(p->bus);
@@ -1047,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev)
u32 drc_index, metadata_size;
u64 blocks, block_size;
struct papr_scm_priv *p;
+ u8 uuid_raw[UUID_SIZE];
const char *uuid_str;
- u64 uuid[2];
+ ssize_t stat_size;
+ uuid_t uuid;
int rc;
/* check we have all the required DT properties */
@@ -1088,18 +1175,30 @@ static int papr_scm_probe(struct platform_device *pdev)
p->block_size = block_size;
p->blocks = blocks;
p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+ p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
+
+ if (of_property_read_u64(dn, "ibm,persistence-failed-count",
+ &p->dirty_shutdown_counter))
+ p->dirty_shutdown_counter = 0;
/* We just need to ensure that set cookies are unique across */
- uuid_parse(uuid_str, (uuid_t *) uuid);
+ uuid_parse(uuid_str, &uuid);
+
/*
- * cookie1 and cookie2 are not really little endian
- * we store a little endian representation of the
- * uuid str so that we can compare this with the label
- * area cookie irrespective of the endian config with which
- * the kernel is built.
+ * The cookie1 and cookie2 are not really little endian.
+ * We store a raw buffer representation of the
+ * uuid string so that we can compare this with the label
+ * area cookie irrespective of the endian configuration
+ * with which the kernel is built.
+ *
+ * Historically we stored the cookie in the below format.
+ * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
+ * cookie1 was 0xfd423b0b671b5172
+ * cookie2 was 0xaabce8cae35b1d8d
*/
- p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
- p->nd_set.cookie2 = cpu_to_le64(uuid[1]);
+ export_uuid(uuid_raw, &uuid);
+ p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
+ p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
/* might be zero */
p->metadata_size = metadata_size;
@@ -1124,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev)
p->res.name = pdev->name;
p->res.flags = IORESOURCE_MEM;
+ /* Try retrieving the stat buffer and see if its supported */
+ stat_size = drc_pmem_query_stats(p, NULL, 0);
+ if (stat_size > 0) {
+ p->stat_buffer_len = stat_size;
+ dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+ p->stat_buffer_len);
+ }
+
rc = papr_scm_nvdimm_init(p);
if (rc)
goto err2;
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1bffbd1c9a94..3b6800f774c2 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -224,8 +224,6 @@ static void __init pSeries_request_regions(void)
void __init pSeries_final_fixup(void)
{
- struct pci_controller *hose;
-
pSeries_request_regions();
eeh_show_enabled();
@@ -234,27 +232,6 @@ void __init pSeries_final_fixup(void)
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
#endif
- list_for_each_entry(hose, &hose_list, list_node) {
- struct device_node *dn = hose->dn, *nvdn;
-
- while (1) {
- dn = of_find_all_nodes(dn);
- if (!dn)
- break;
- nvdn = of_parse_phandle(dn, "ibm,nvlink", 0);
- if (!nvdn)
- continue;
- if (!of_device_is_compatible(nvdn, "ibm,npu-link"))
- continue;
- if (!of_device_is_compatible(nvdn->parent,
- "ibm,power9-npu"))
- continue;
-#ifdef CONFIG_PPC_POWERNV
- WARN_ON_ONCE(pnv_npu2_init(hose));
-#endif
- break;
- }
- }
}
/*
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index f9ae17e8a0f4..a8f9140a24fa 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic);
int remove_phb_dynamic(struct pci_controller *phb)
{
struct pci_bus *b = phb->bus;
+ struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge);
struct resource *res;
int rc, i;
@@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
/* Remove the PCI bus and unregister the bridge device from sysfs */
phb->bus = NULL;
pci_remove_bus(b);
- device_unregister(b->bridge);
+ host_bridge->bus = NULL;
+ device_unregister(&host_bridge->dev);
/* Now release the IO resource */
if (res->flags & IORESOURCE_IO)
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index e1dc5d3254df..439ac72c2470 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
return rc;
}
-const struct of_device_id drc_pmem_match[] = {
+static const struct of_device_id drc_pmem_match[] = {
{ .type = "ibm,persistent-memory", },
{}
};
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 4fe48c04c6c2..1f051a786fb3 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void);
/* Poweron flag used for enabling auto ups restart */
extern unsigned long rtas_poweron_auto;
-/* Provided by HVC VIO */
-extern void hvc_vio_init_early(void);
-
/* Dynamic logical Partitioning/Mobility */
extern void dlpar_free_cc_nodes(struct device_node *);
extern void dlpar_free_cc_property(struct property *);
@@ -55,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *);
extern int dlpar_detach_node(struct device_node *);
extern int dlpar_acquire_drc(u32 drc_index);
extern int dlpar_release_drc(u32 drc_index);
+extern int dlpar_unisolate_drc(u32 drc_index);
void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
@@ -111,6 +109,7 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
+extern u32 pseries_security_flavor;
void pseries_setup_security_mitigations(void);
void pseries_lpar_read_hblkrm_characteristics(void);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f8b390a9d9fb..167f2e1b8d39 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
if ((be64_to_cpu(regs->msr) &
(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
- regs->nip = be64_to_cpu((__be64)regs->nip);
- regs->msr = 0;
+ regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
+ regs_set_return_msr(regs, 0);
}
#endif
@@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.severity = MCE_SEV_SEVERE;
else if (severity == RTAS_SEVERITY_ERROR)
mce_err.severity = MCE_SEV_SEVERE;
- else if (severity == RTAS_SEVERITY_FATAL)
- mce_err.severity = MCE_SEV_FATAL;
else
mce_err.severity = MCE_SEV_FATAL;
@@ -699,7 +697,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
break;
case MC_ERROR_TYPE_I_CACHE:
- mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+ mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
break;
case MC_ERROR_TYPE_UNKNOWN:
default:
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
index 81343908ed33..f8f73b47b107 100644
--- a/arch/powerpc/platforms/pseries/rtas-fadump.c
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id)
return i;
}
-void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
{
int i;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 46e1540abc22..631a0d57b6cd 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
#include <asm/swiotlb.h>
#include <asm/svm.h>
#include <asm/dtl.h>
+#include <asm/hvconsole.h>
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
@@ -85,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize);
int fwnmi_active; /* TRUE if an FWNMI handler is present */
int ibm_nmi_interlock_token;
+u32 pseries_security_flavor;
static void pSeries_show_cpuinfo(struct seq_file *m)
{
@@ -534,13 +536,28 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
/*
* The features below are enabled by default, so we instead look to see
* if firmware has *disabled* them, and clear them if so.
+ * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
+ * H_CPU_BEHAV_FAVOUR_SECURITY is.
*/
if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+ else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
+ pseries_security_flavor = 1;
+ else
+ pseries_security_flavor = 2;
if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+ if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
+ security_ftr_clear(SEC_FTR_STF_BARRIER);
+
if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index c70b4be9f0a5..096629f54576 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void)
if (!cpu_has_feature(CPU_FTR_SMT))
return;
- if (check_kvm_guest()) {
+ check_kvm_guest();
+
+ if (is_kvm_guest()) {
/*
* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
* faults to the hypervisor which then reads the instruction
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 7b739cc7a8a9..1d829e257996 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -55,9 +55,9 @@ void __init svm_swiotlb_init(void)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
return;
- if (io_tlb_start)
- memblock_free_early(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+
+ memblock_free_early(__pa(vstart),
+ PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
panic("SVM: Cannot allocate SWIOTLB buffer");
}
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644
index 000000000000..b5c1cf1bc64d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS 1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+
+static long hcall_return_busy_check(long rc)
+{
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
+ return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+ u8 wintype, u16 credits)
+{
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ long rc;
+
+ do {
+ rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+ credits, domain[0], domain[1], domain[2],
+ domain[3], domain[4], domain[5]);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS) {
+ if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+ pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+ win->vas_win.winid = retbuf[0];
+ win->win_addr = retbuf[1];
+ win->complete_irq = retbuf[2];
+ win->fault_irq = retbuf[3];
+ return 0;
+ }
+
+ pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+ rc, wintype, credits);
+
+ return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+ rc, winid);
+ return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+ long rc;
+ u32 lpid = mfspr(SPRN_PID);
+
+ /*
+ * AMR value is not supported in Linux VAS implementation.
+ * The hypervisor ignores it if 0 is passed.
+ */
+ do {
+ rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+ win->vas_win.winid, lpid, 0,
+ VAS_MOD_WIN_FLAGS, 0);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
+ rc, win->vas_win.winid, lpid);
+ return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ * capabilities which provides all feature(s) that are
+ * available. Then query the hypervisor to get the
+ * corresponding capabilities for the specific feature.
+ * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ * and VAS GZIP Default capabilities.
+ * H_QUERY_NX_CAPABILITIES provides NX GZIP
+ * capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(hcall, query_type, result);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
+ hcall, rc, query_type, result);
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+ rc, winid, buffer);
+ return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ * So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+ struct coprocessor_request_block crb;
+ struct vas_user_win_ref *tsk_ref;
+ int rc;
+
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+ u64 *domain, u8 wintype)
+{
+ int rc;
+
+ rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+ if (rc)
+ return rc;
+ /*
+ * On PowerVM, the hypervisor setup and forwards the fault
+ * interrupt per window. So the IRQ setup and fault handling
+ * will be done for each open window separately.
+ */
+ txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+ if (!txwin->fault_virq) {
+ pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+ rc = -EINVAL;
+ goto out_win;
+ }
+
+ txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+ txwin->vas_win.winid);
+ if (!txwin->name) {
+ rc = -ENOMEM;
+ goto out_irq;
+ }
+
+ rc = request_threaded_irq(txwin->fault_virq, NULL,
+ pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+ txwin->name, txwin);
+ if (rc) {
+ pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+ txwin->vas_win.winid, txwin->fault_virq, rc);
+ goto out_free;
+ }
+
+ txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+ return 0;
+out_free:
+ kfree(txwin->name);
+out_irq:
+ irq_dispose_mapping(txwin->fault_virq);
+out_win:
+ h_deallocate_vas_window(txwin->vas_win.winid);
+ return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+ free_irq(txwin->fault_virq, txwin);
+ kfree(txwin->name);
+ irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+ enum vas_cop_type cop_type)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *cop_feat_caps;
+ struct vas_caps *caps;
+ struct pseries_vas_window *txwin;
+ int rc;
+
+ txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+ if (!txwin)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * A VAS window can have many credits which means that many
+ * requests can be issued simultaneously. But the hypervisor
+ * restricts one credit per window.
+ * The hypervisor introduces 2 different types of credits:
+ * Default credit type (Uses normal priority FIFO):
+ * A limited number of credits are assigned to partitions
+ * based on processor entitlement. But these credits may be
+ * over-committed on a system depends on whether the CPUs
+ * are in shared or dedicated modes - that is, more requests
+ * may be issued across the system than NX can service at
+ * once which can result in paste command failure (RMA_busy).
+ * Then the process has to resend requests or fall-back to
+ * SW compression.
+ * Quality of Service (QoS) credit type (Uses high priority FIFO):
+ * To avoid NX HW contention, the system admins can assign
+ * QoS credits for each LPAR so that this partition is
+ * guaranteed access to NX resources. These credits are
+ * assigned to partitions via the HMC.
+ * Refer PAPR for more information.
+ *
+ * Allocate window with QoS credits if user requested. Otherwise
+ * default credits are used.
+ */
+ if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+ caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+ else
+ caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+ cop_feat_caps = &caps->caps;
+
+ if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
+ atomic_read(&cop_feat_caps->target_lpar_creds)) {
+ pr_err("Credits are not available to allocate window\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (vas_id == -1) {
+ /*
+ * The user space is requesting to allocate a window on
+ * a VAS instance where the process is executing.
+ * On PowerVM, domain values are passed to the hypervisor
+ * to select VAS instance. Useful if the process is
+ * affinity to NUMA node.
+ * The hypervisor selects VAS instance if
+ * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+ * The h_allocate_vas_window hcall is defined to take a
+ * domain values as specified by h_home_node_associativity,
+ * So no unpacking needs to be done.
+ */
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+ VPHN_FLAG_VCPU, smp_processor_id());
+ if (rc != H_SUCCESS) {
+ pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+ goto out;
+ }
+ }
+
+ /*
+ * Allocate / Deallocate window hcalls and setup / free IRQs
+ * have to be protected with mutex.
+ * Open VAS window: Allocate window hcall and setup IRQ
+ * Close VAS window: Deallocate window hcall and free IRQ
+ * The hypervisor waits until all NX requests are
+ * completed before closing the window. So expects OS
+ * to handle NX faults, means IRQ can be freed only
+ * after the deallocate window hcall is returned.
+ * So once the window is closed with deallocate hcall before
+ * the IRQ is freed, it can be assigned to new allocate
+ * hcall with the same fault IRQ by the hypervisor. It can
+ * result in setup IRQ fail for the new window since the
+ * same fault IRQ is not freed by the OS before.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+ cop_feat_caps->win_type);
+ mutex_unlock(&vas_pseries_mutex);
+ if (rc)
+ goto out;
+
+ /*
+ * Modify window and it is ready to use.
+ */
+ rc = h_modify_vas_window(txwin);
+ if (!rc)
+ rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+ if (rc)
+ goto out_free;
+
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ txwin->win_type = cop_feat_caps->win_type;
+ mutex_lock(&vas_pseries_mutex);
+ list_add(&txwin->win_list, &caps->list);
+ mutex_unlock(&vas_pseries_mutex);
+
+ return &txwin->vas_win;
+
+out_free:
+ /*
+ * Window is not operational. Free IRQ before closing
+ * window so that do not have to hold mutex.
+ */
+ free_irq_setup(txwin);
+ h_deallocate_vas_window(txwin->vas_win.winid);
+out:
+ atomic_dec(&cop_feat_caps->used_lpar_creds);
+ kfree(txwin);
+ return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+ return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+ int rc = 0;
+
+ /*
+ * The hypervisor waits for all requests including faults
+ * are processed before closing the window - Means all
+ * credits have to be returned. In the case of fault
+ * request, a credit is returned after OS issues
+ * H_GET_NX_FAULT hcall.
+ * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+ * hcall.
+ */
+ rc = h_deallocate_vas_window(win->vas_win.winid);
+ if (!rc)
+ free_irq_setup(win);
+
+ return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+ struct vas_cop_feat_caps *caps;
+ int rc = 0;
+
+ if (!vwin)
+ return -EINVAL;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+ /* Should not happen */
+ if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Window (%u): Invalid window type %u\n",
+ vwin->winid, win->win_type);
+ return -EINVAL;
+ }
+
+ caps = &vascaps[win->win_type].caps;
+ mutex_lock(&vas_pseries_mutex);
+ rc = deallocate_free_window(win);
+ if (rc) {
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+ }
+
+ list_del(&win->win_list);
+ atomic_dec(&caps->used_lpar_creds);
+ mutex_unlock(&vas_pseries_mutex);
+
+ put_vas_user_win_ref(&vwin->task_ref);
+ mm_context_remove_vas_window(vwin->task_ref.mm);
+
+ kfree(win);
+ return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+ .open_win = vas_allocate_window, /* Open and configure window */
+ .paste_addr = vas_paste_address, /* To do copy/paste */
+ .close_win = vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+ int rc;
+
+ if (!copypaste_feat)
+ return -ENOTSUPP;
+
+ rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+ vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+ struct hv_vas_cop_feat_caps *hv_caps)
+{
+ struct vas_cop_feat_caps *caps;
+ struct vas_caps *vcaps;
+ int rc = 0;
+
+ vcaps = &vascaps[type];
+ memset(vcaps, 0, sizeof(*vcaps));
+ INIT_LIST_HEAD(&vcaps->list);
+
+ caps = &vcaps->caps;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ return rc;
+
+ caps->user_mode = hv_caps->user_mode;
+ if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+ pr_err("User space COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+
+ caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps->win_type = hv_caps->win_type;
+ if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Unsupported window type %u\n", caps->win_type);
+ return -EINVAL;
+ }
+ caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+ caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+ atomic_set(&caps->target_lpar_creds,
+ be16_to_cpu(hv_caps->target_lpar_creds));
+ if (feat == VAS_GZIP_DEF_FEAT) {
+ caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+ if (caps->max_win_creds < DEF_WIN_CREDS) {
+ pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+ DEF_WIN_CREDS, caps->max_win_creds);
+ return -EINVAL;
+ }
+ }
+
+ copypaste_feat = true;
+
+ return 0;
+}
+
+static int __init pseries_vas_init(void)
+{
+ struct hv_vas_cop_feat_caps *hv_cop_caps;
+ struct hv_vas_all_caps *hv_caps;
+ int rc;
+
+ /*
+ * Linux supports user space COPY/PASTE only with Radix
+ */
+ if (!radix_enabled()) {
+ pr_err("API is supported only with radix page tables\n");
+ return -ENOTSUPP;
+ }
+
+ hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+ if (!hv_caps)
+ return -ENOMEM;
+ /*
+ * Get VAS overall capabilities by passing 0 to feature type.
+ */
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ goto out;
+
+ caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+ hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
+ if (!hv_cop_caps) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ /*
+ * QOS capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+ VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
+
+ if (rc)
+ goto out_cop;
+ }
+ /*
+ * Default capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+ VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
+ if (rc)
+ goto out_cop;
+ }
+
+ pr_info("GZIP feature is available\n");
+
+out_cop:
+ kfree(hv_cop_caps);
+out:
+ kfree(hv_caps);
+ return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
new file mode 100644
index 000000000000..4ecb3fcabd10
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <asm/vas.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * VAS window modify flags
+ */
+#define VAS_MOD_WIN_CLOSE PPC_BIT(0)
+#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1)
+#define VAS_MOD_WIN_DR PPC_BIT(3)
+#define VAS_MOD_WIN_PR PPC_BIT(4)
+#define VAS_MOD_WIN_SF PPC_BIT(5)
+#define VAS_MOD_WIN_TA PPC_BIT(6)
+#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \
+ VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
+
+#define VAS_WIN_ACTIVE 0x0
+#define VAS_WIN_CLOSED 0x1
+#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */
+/* Process of being modified, deallocated, or quiesced */
+#define VAS_WIN_MOD_IN_PROCESS 0x3
+
+#define VAS_COPY_PASTE_USER_MODE 0x00000001
+#define VAS_COP_OP_USER_MODE 0x00000010
+
+/*
+ * Co-processor feature - GZIP QoS windows or GZIP default windows
+ */
+enum vas_cop_feat_type {
+ VAS_GZIP_QOS_FEAT_TYPE,
+ VAS_GZIP_DEF_FEAT_TYPE,
+ VAS_MAX_FEAT_TYPE,
+};
+
+/*
+ * Use to get feature specific capabilities from the
+ * hypervisor.
+ */
+struct hv_vas_cop_feat_caps {
+ __be64 descriptor;
+ u8 win_type; /* Default or QoS type */
+ u8 user_mode;
+ __be16 max_lpar_creds;
+ __be16 max_win_creds;
+ union {
+ __be16 reserved;
+ __be16 def_lpar_creds; /* Used for default capabilities */
+ };
+ __be16 target_lpar_creds;
+} __packed __aligned(0x1000);
+
+/*
+ * Feature specific (QoS or default) capabilities.
+ */
+struct vas_cop_feat_caps {
+ u64 descriptor;
+ u8 win_type; /* Default or QoS type */
+ u8 user_mode; /* User mode copy/paste or COP HCALL */
+ u16 max_lpar_creds; /* Max credits available in LPAR */
+ /* Max credits can be assigned per window */
+ u16 max_win_creds;
+ union {
+ u16 reserved; /* Used for QoS credit type */
+ u16 def_lpar_creds; /* Used for default credit type */
+ };
+ /* Total LPAR available credits. Can be different from max LPAR */
+ /* credits due to DLPAR operation */
+ atomic_t target_lpar_creds;
+ atomic_t used_lpar_creds; /* Used credits so far */
+ u16 avail_lpar_creds; /* Remaining available credits */
+};
+
+/*
+ * Feature (QoS or Default) specific to store capabilities and
+ * the list of open windows.
+ */
+struct vas_caps {
+ struct vas_cop_feat_caps caps;
+ struct list_head list; /* List of open windows */
+};
+
+/*
+ * To get window information from the hypervisor.
+ */
+struct hv_vas_win_lpar {
+ __be16 version;
+ u8 win_type;
+ u8 status;
+ __be16 credits; /* No of credits assigned to this window */
+ __be16 reserved;
+ __be32 pid; /* LPAR Process ID */
+ __be32 tid; /* LPAR Thread ID */
+ __be64 win_addr; /* Paste address */
+ __be32 interrupt; /* Interrupt when NX request completes */
+ __be32 fault; /* Interrupt when NX sees fault */
+ /* Associativity Domain Identifiers as returned in */
+ /* H_HOME_NODE_ASSOCIATIVITY */
+ __be64 domain[6];
+ __be64 win_util; /* Number of bytes processed */
+} __packed __aligned(0x1000);
+
+struct pseries_vas_window {
+ struct vas_window vas_win;
+ u64 win_addr; /* Physical paste address */
+ u8 win_type; /* QoS or Default window */
+ u32 complete_irq; /* Completion interrupt */
+ u32 fault_irq; /* Fault interrupt */
+ u64 domain[6]; /* Associativity domain Ids */
+ /* this window is allocated */
+ u64 util;
+
+ /* List of windows opened which is used for LPM */
+ struct list_head win_list;
+ u64 flags;
+ char *name;
+ int fault_virq;
+};
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index b2797cfe4e2b..e00f3725ec96 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-map-ops.h>
#include <linux/kobject.h>
+#include <linux/kexec.h>
#include <asm/iommu.h>
#include <asm/dma.h>
@@ -1261,7 +1262,6 @@ static int vio_bus_remove(struct device *dev)
struct vio_dev *viodev = to_vio_dev(dev);
struct vio_driver *viodrv = to_vio_driver(dev->driver);
struct device *devptr;
- int ret = 1;
/*
* Hold a reference to the device after the remove function is called
@@ -1270,13 +1270,27 @@ static int vio_bus_remove(struct device *dev)
devptr = get_device(dev);
if (viodrv->remove)
- ret = viodrv->remove(viodev);
+ viodrv->remove(viodev);
- if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+ if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_bus_remove(viodev);
put_device(devptr);
- return ret;
+ return 0;
+}
+
+static void vio_bus_shutdown(struct device *dev)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct vio_driver *viodrv;
+
+ if (dev->driver) {
+ viodrv = to_vio_driver(dev->driver);
+ if (viodrv->shutdown)
+ viodrv->shutdown(viodev);
+ else if (kexec_in_progress)
+ vio_bus_remove(dev);
+ }
}
/**
@@ -1286,6 +1300,10 @@ static int vio_bus_remove(struct device *dev)
int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
const char *mod_name)
{
+ // vio_bus_type is only initialised for pseries
+ if (!machine_is(pseries))
+ return -ENODEV;
+
pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
/* fill in 'struct driver' fields */
@@ -1614,6 +1632,7 @@ struct bus_type vio_bus_type = {
.match = vio_bus_match,
.probe = vio_bus_probe,
.remove = vio_bus_remove,
+ .shutdown = vio_bus_shutdown,
};
/**