diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries')
25 files changed, 1170 insertions, 140 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c8a2b0b05ac0..4cda0ef87be0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PPC_VAS) += vas.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 233503fcf8f0..b1f01ac0c29e 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_UNUSABLE) return -1; @@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; @@ -329,6 +327,19 @@ int dlpar_release_drc(u32 drc_index) return 0; } +int dlpar_unisolate_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + + return 0; +} + int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 12cbffd3c2e3..7e970f81d8ff 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -47,9 +47,6 @@ static void rtas_stop_self(void) BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); - printk("cpu %u (hwid %u) Ready to die...\n", - smp_processor_id(), hard_smp_processor_id()); - rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); @@ -271,6 +268,19 @@ static int dlpar_offline_cpu(struct device_node *dn) if (!cpu_online(cpu)) break; + /* + * device_offline() will return -EBUSY (via cpu_down()) if there + * is only one CPU left. Check it here to fail earlier and with a + * more informative error message, while also retaining the + * cpu_add_remove_lock to be sure that no CPUs are being + * online/offlined during this check. + */ + if (num_online_cpus() == 1) { + pr_warn("Unable to remove last online CPU %pOFn\n", dn); + rc = -EBUSY; + goto out_unlock; + } + cpu_maps_update_done(); rc = device_offline(get_cpu_device(cpu)); if (rc) @@ -283,6 +293,7 @@ static int dlpar_offline_cpu(struct device_node *dn) thread); } } +out_unlock: cpu_maps_update_done(); out: @@ -802,8 +813,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) rc = dlpar_cpu_remove_by_count(count); - else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { rc = dlpar_cpu_remove_by_index(drc_index); + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the CPU removal failed. + */ + if (rc) + dlpar_unisolate_drc(drc_index); + } else rc = -EINVAL; break; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8377f1f7c78e..377d852f5a9a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { - if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) return false; #ifdef CONFIG_FA_DUMP @@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { struct drmem_lmb *lmb; - int lmbs_removed = 0; + int lmbs_reserved = 0; int lmbs_available = 0; int rc; @@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) */ drmem_mark_lmb_reserved(lmb); - lmbs_removed++; - if (lmbs_removed == lmbs_to_remove) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_remove) break; } - if (lmbs_removed != lmbs_to_remove) { + if (lmbs_reserved != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); for_each_drmem_lmb(lmb) { @@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->drc_index); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = -EINVAL; @@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->base_addr); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", @@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (rc) return -EINVAL; - /* Validate that there are enough LMBs to satisfy the request */ + /* + * Validate that all LMBs in range are not reserved. Note that it + * is ok if they are !ASSIGNED since our goal here is to remove the + * LMB range, regardless of whether some LMBs were already removed + * by any other reason. + * + * This is a contrast to what is done in remove_by_count() where we + * check for both RESERVED and !ASSIGNED (via lmb_is_removable()), + * because we want to remove a fixed amount of LMBs in that function. + */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_remove) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* + * dlpar_remove_lmb() will error out if the LMB is already + * !ASSIGNED, but this case is a no-op for us. + */ if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; @@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (!drmem_lmb_reserved(lmb)) continue; + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the LMB removal failed. + */ + dlpar_unisolate_drc(lmb->drc_index); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", @@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np) { return 0; } -static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) -{ - return -EOPNOTSUPP; -} static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; @@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) { struct drmem_lmb *lmb; int lmbs_available = 0; - int lmbs_added = 0; + int lmbs_reserved = 0; int rc; pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); @@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) /* Validate that there are enough LMBs to satisfy the request */ for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) + continue; + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) lmbs_available++; @@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) * requested LMBs cannot be added. */ drmem_mark_lmb_reserved(lmb); - - lmbs_added++; - if (lmbs_added == lmbs_to_add) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_add) break; } - if (lmbs_added != lmbs_to_add) { + if (lmbs_reserved != lmbs_to_add) { pr_err("Memory hot-add failed, removing any added LMBs\n"); for_each_drmem_lmb(lmb) { @@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) dlpar_release_drc(lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = -EINVAL; } else { @@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) pr_debug("Memory at %llx (drc index %x) was hot-added\n", lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index) static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-add %u LMB(s) at index %x\n", @@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) /* Validate that the LMBs in this range are not reserved */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + /* Fail immediately if the whole range can't be hot-added */ + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_add) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2136e42833af..ab9fc6506861 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -102,6 +102,20 @@ END_FTR_SECTION(0, 1); \ #define HCALL_BRANCH(LABEL) #endif +_GLOBAL_TOC(plpar_hcall_norets_notrace) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HVSC /* invoke the hypervisor */ + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM @@ -110,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets) HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -119,6 +136,10 @@ plpar_hcall_norets_trace: HCALL_INST_PRECALL(R4) HVSC HCALL_INST_POSTCALL_NORETS + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr @@ -149,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -178,6 +202,9 @@ plpar_hcall_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -213,6 +240,9 @@ _GLOBAL(plpar_hcall_raw) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -252,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -290,6 +323,9 @@ plpar_hcall9_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -329,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 2c59b4986ea5..3a50612a78db 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,7 +26,7 @@ struct hcall_stats { }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) -DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); +static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); /* * Routines for displaying the statistics in debugfs diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index a15ab33646b3..c6c79ef55e13 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -42,6 +42,7 @@ #include <linux/kobject.h> #include <linux/dma-map-ops.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/of.h> #include <linux/slab.h> #include <linux/stat.h> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 9fc5217f0c8e..0c55b991f665 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, pci->phb->node, 0, 0); + if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) iommu_table_setparms_lpar(ppci->phb, pdn, tbl, ppci->table_group, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - iommu_init_table(tbl, ppci->phb->node, 0, 0); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, phb->node, 0, 0); + if (!iommu_init_table(tbl, phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + set_iommu_table_base(&dev->dev, tbl); return; } @@ -1099,6 +1103,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) ret); } +/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ +static int iommu_get_page_shift(u32 query_page_size) +{ + /* Supported IO page-sizes according to LoPAR */ + const int shift[] = { + __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), + __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), + __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G) + }; + + int i = ARRAY_SIZE(shift) - 1; + + /* + * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: + * - bit 31 means 4k pages are supported, + * - bit 30 means 64k pages are supported, and so on. + * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. + */ + for (; i >= 0 ; i--) { + if (query_page_size & (1 << i)) + return shift[i]; + } + + /* No valid page size found. */ + return 0; +} + /* * If the PE supports dynamic dma windows, and there is space for a table * that can map all pages in a linear offset, then setup such a table, @@ -1206,13 +1237,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_failed; } } - if (query.page_size & 4) { - page_shift = 24; /* 16MB */ - } else if (query.page_size & 2) { - page_shift = 16; /* 64kB */ - } else if (query.page_size & 1) { - page_shift = 12; /* 4kB */ - } else { + + page_shift = iommu_get_page_shift(query.page_size); + if (!page_shift) { dev_dbg(&dev->dev, "no supported direct page size in mask %x", query.page_size); goto out_failed; @@ -1229,7 +1256,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (pmem_present) { if (query.largest_available_block >= (1ULL << (MAX_PHYSMEM_BITS - page_shift))) - len = MAX_PHYSMEM_BITS - page_shift; + len = MAX_PHYSMEM_BITS; else dev_info(&dev->dev, "Skipping ibm,pmemory"); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 764170fdb0f7..dab356e3ff87 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -887,7 +887,8 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, want_v = hpte_encode_avpn(vpn, psize, ssize); - flags = (newpp & 7) | H_AVPN; + flags = (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO)) | H_AVPN; + flags |= (newpp & HPTE_R_KEY_HI) >> 48; if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; @@ -976,11 +977,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - flags = newpp & 7; + flags = newpp & (HPTE_R_PP | HPTE_R_N); if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO); + lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); @@ -1629,7 +1632,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) } msleep(delay); rc = plpar_resize_hpt_prepare(0, shift); - }; + } switch (rc) { case H_SUCCESS: @@ -1826,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void) #endif /* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. + * Keep track of hcall tracing depth and prevent recursion. Warn if any is + * detected because it may indicate a problem. This will not catch all + * problems with tracing code making hcalls, because the tracing might have + * been invoked from a non-hcall, so the first hcall could recurse into it + * without warning here, but this better than nothing. + * + * Hcalls with specific problems being traced should use the _notrace + * plpar_hcall variants. */ static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); -void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { unsigned long flags; unsigned int *depth; - /* - * We cannot call tracepoints inside RCU idle regions which - * means we must not trace H_CEDE. - */ - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (WARN_ON_ONCE(*depth)) goto out; (*depth)++; @@ -1861,19 +1862,16 @@ out: local_irq_restore(flags); } -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) +notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) { unsigned long flags; unsigned int *depth; - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (*depth) /* Don't warn again on the way out */ goto out; (*depth)++; diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e278390ab28d..f71eac74ea92 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_em_data(m); maxmem_data(m); + seq_printf(m, "security_flavor=%u\n", pseries_security_flavor); + return 0; } diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index ea4d6a660e0d..e83e0891272d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -452,12 +452,28 @@ static int do_suspend(void) return ret; } +/** + * struct pseries_suspend_info - State shared between CPUs for join/suspend. + * @counter: Threads are to increment this upon resuming from suspend + * or if an error is received from H_JOIN. The thread which performs + * the first increment (i.e. sets it to 1) is responsible for + * waking the other threads. + * @done: False if join/suspend is in progress. True if the operation is + * complete (successful or not). + */ +struct pseries_suspend_info { + atomic_t counter; + bool done; +}; + static int do_join(void *arg) { - atomic_t *counter = arg; + struct pseries_suspend_info *info = arg; + atomic_t *counter = &info->counter; long hvrc; int ret; +retry: /* Must ensure MSR.EE off for H_JOIN. */ hard_irq_disable(); hvrc = plpar_hcall_norets(H_JOIN); @@ -473,8 +489,20 @@ static int do_join(void *arg) case H_SUCCESS: /* * The suspend is complete and this cpu has received a - * prod. + * prod, or we've received a stray prod from unrelated + * code (e.g. paravirt spinlocks) and we need to join + * again. + * + * This barrier orders the return from H_JOIN above vs + * the load of info->done. It pairs with the barrier + * in the wakeup/prod path below. */ + smp_mb(); + if (READ_ONCE(info->done) == false) { + pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying", + smp_processor_id()); + goto retry; + } ret = 0; break; case H_BAD_MODE: @@ -488,6 +516,13 @@ static int do_join(void *arg) if (atomic_inc_return(counter) == 1) { pr_info("CPU %u waking all threads\n", smp_processor_id()); + WRITE_ONCE(info->done, true); + /* + * This barrier orders the store to info->done vs subsequent + * H_PRODs to wake the other CPUs. It pairs with the barrier + * in the H_SUCCESS case above. + */ + smp_mb(); prod_others(); } /* @@ -535,11 +570,16 @@ static int pseries_suspend(u64 handle) int ret; while (true) { - atomic_t counter = ATOMIC_INIT(0); + struct pseries_suspend_info info; unsigned long vasi_state; int vasi_err; - ret = stop_machine(do_join, &counter, cpu_online_mask); + info = (struct pseries_suspend_info) { + .counter = ATOMIC_INIT(0), + .done = false, + }; + + ret = stop_machine(do_join, &info, cpu_online_mask); if (ret == 0) break; /* diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index b3ac2455faad..637300330507 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Michael Ellerman, IBM Corp. */ +#include <linux/crash_dump.h> #include <linux/device.h> #include <linux/irq.h> #include <linux/msi.h> @@ -458,8 +459,28 @@ again: return hwirq; } - virq = irq_create_mapping_affinity(NULL, hwirq, - entry->affinity); + /* + * Depending on the number of online CPUs in the original + * kernel, it is likely for CPU #0 to be offline in a kdump + * kernel. The associated IRQs in the affinity mappings + * provided by irq_create_affinity_masks() are thus not + * started by irq_startup(), as per-design for managed IRQs. + * This can be a problem with multi-queue block devices driven + * by blk-mq : such a non-started IRQ is very likely paired + * with the single queue enforced by blk-mq during kdump (see + * blk_mq_alloc_tag_set()). This causes the device to remain + * silent and likely hangs the guest at some point. + * + * We don't really care for fine-grained affinity when doing + * kdump actually : simply ignore the pre-computed affinity + * masks in this case and let the default mask with all CPUs + * be used when creating the IRQ mappings. + */ + if (is_kdump_kernel()) + virq = irq_create_mapping(NULL, hwirq); + else + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 835163f54244..f48e87ac89c9 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -18,6 +18,7 @@ #include <asm/plpar_wrappers.h> #include <asm/papr_pdsm.h> #include <asm/mce.h> +#include <asm/unaligned.h> #define BIND_ANY_ADDR (~0ul) @@ -93,6 +94,7 @@ struct papr_scm_priv { uint64_t block_size; int metadata_size; bool is_volatile; + bool hcall_flush_required; uint64_t bound_addr; @@ -113,10 +115,45 @@ struct papr_scm_priv { /* Health information for the dimm */ u64 health_bitmap; + /* Holds the last known dirty shutdown counter value */ + u64 dirty_shutdown_counter; + /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; }; +static int papr_scm_pmem_flush(struct nd_region *nd_region, + struct bio *bio __maybe_unused) +{ + struct papr_scm_priv *p = nd_region_provider_data(nd_region); + unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; + long rc; + + dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); + + do { + rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); + token = ret_buf[0]; + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); + + if (rc) { + dev_err(&p->pdev->dev, "flush error: %ld", rc); + rc = -EIO; + } else { + dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); + } + + return rc; +} + static LIST_HEAD(papr_nd_regions); static DEFINE_MUTEX(papr_ndr_lock); @@ -227,7 +264,7 @@ err_out: * Query the Dimm performance stats from PHYP and copy them (if returned) to * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast * (num_stats + header) bytes. - * - If buff_stats == NULL the return value is the size in byes of the buffer + * - If buff_stats == NULL the return value is the size in bytes of the buffer * needed to hold all supported performance-statistics. * - If buff_stats != NULL and num_stats == 0 then we copy all known * performance-statistics to 'buff_stat' and expect to be large enough to @@ -277,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, dev_err(&p->pdev->dev, "Unknown performance stats, Err:0x%016lX\n", ret[0]); return -ENOENT; + } else if (rc == H_AUTHORITY) { + dev_info(&p->pdev->dev, + "Permission denied while accessing performance stats"); + return -EPERM; + } else if (rc == H_UNSUPPORTED) { + dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); + return -EOPNOTSUPP; } else if (rc != H_SUCCESS) { dev_err(&p->pdev->dev, "Failed to query performance stats, Err:%lld\n", rc); @@ -563,6 +607,16 @@ free_stats: return rc; } +/* Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_dsc(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; + payload->health.dimm_dsc = p->dirty_shutdown_counter; + + return sizeof(struct nd_papr_pdsm_health); +} + /* Fetch the DIMM health info and populate it in provided package. */ static int papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload) @@ -606,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p, /* Populate the fuel gauge meter in the payload */ papr_pdsm_fuel_gauge(p, payload); + /* Populate the dirty-shutdown-counter field */ + papr_pdsm_dsc(p, payload); rc = sizeof(struct nd_papr_pdsm_health); @@ -867,15 +923,41 @@ static ssize_t flags_show(struct device *dev, } DEVICE_ATTR_RO(flags); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); +} +DEVICE_ATTR_RO(dirty_shutdown); + +static umode_t papr_nd_attribute_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); + + /* For if perf-stats not available remove perf_stats sysfs */ + if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) + return 0; + + return attr->mode; +} + /* papr_scm specific dimm attributes */ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, + &dev_attr_dirty_shutdown.attr, NULL, }; static struct attribute_group papr_nd_attribute_group = { .name = "papr", + .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, }; @@ -891,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; - ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -914,6 +995,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); + /* + * Check if the nvdimm is unarmed. No locking needed as we are still + * initializing. Ignore error encountered if any. + */ + __drc_pmem_query_health(p); + + if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { @@ -943,6 +1033,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.num_mappings = 1; ndr_desc.nd_set = &p->nd_set; + if (p->hcall_flush_required) { + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + ndr_desc.flush = papr_scm_pmem_flush; + } + if (p->is_volatile) p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); else { @@ -962,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) list_add_tail(&p->region_list, &papr_nd_regions); mutex_unlock(&papr_ndr_lock); - /* Try retriving the stat buffer and see if its supported */ - stat_size = drc_pmem_query_stats(p, NULL, 0); - if (stat_size > 0) { - p->stat_buffer_len = stat_size; - dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", - p->stat_buffer_len); - } else { - dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); - } - return 0; err: nvdimm_bus_unregister(p->bus); @@ -1047,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev) u32 drc_index, metadata_size; u64 blocks, block_size; struct papr_scm_priv *p; + u8 uuid_raw[UUID_SIZE]; const char *uuid_str; - u64 uuid[2]; + ssize_t stat_size; + uuid_t uuid; int rc; /* check we have all the required DT properties */ @@ -1088,18 +1175,30 @@ static int papr_scm_probe(struct platform_device *pdev) p->block_size = block_size; p->blocks = blocks; p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); + p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); + + if (of_property_read_u64(dn, "ibm,persistence-failed-count", + &p->dirty_shutdown_counter)) + p->dirty_shutdown_counter = 0; /* We just need to ensure that set cookies are unique across */ - uuid_parse(uuid_str, (uuid_t *) uuid); + uuid_parse(uuid_str, &uuid); + /* - * cookie1 and cookie2 are not really little endian - * we store a little endian representation of the - * uuid str so that we can compare this with the label - * area cookie irrespective of the endian config with which - * the kernel is built. + * The cookie1 and cookie2 are not really little endian. + * We store a raw buffer representation of the + * uuid string so that we can compare this with the label + * area cookie irrespective of the endian configuration + * with which the kernel is built. + * + * Historically we stored the cookie in the below format. + * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa + * cookie1 was 0xfd423b0b671b5172 + * cookie2 was 0xaabce8cae35b1d8d */ - p->nd_set.cookie1 = cpu_to_le64(uuid[0]); - p->nd_set.cookie2 = cpu_to_le64(uuid[1]); + export_uuid(uuid_raw, &uuid); + p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); + p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); /* might be zero */ p->metadata_size = metadata_size; @@ -1124,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev) p->res.name = pdev->name; p->res.flags = IORESOURCE_MEM; + /* Try retrieving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } + rc = papr_scm_nvdimm_init(p); if (rc) goto err2; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 1bffbd1c9a94..3b6800f774c2 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -224,8 +224,6 @@ static void __init pSeries_request_regions(void) void __init pSeries_final_fixup(void) { - struct pci_controller *hose; - pSeries_request_regions(); eeh_show_enabled(); @@ -234,27 +232,6 @@ void __init pSeries_final_fixup(void) ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable; ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable; #endif - list_for_each_entry(hose, &hose_list, list_node) { - struct device_node *dn = hose->dn, *nvdn; - - while (1) { - dn = of_find_all_nodes(dn); - if (!dn) - break; - nvdn = of_parse_phandle(dn, "ibm,nvlink", 0); - if (!nvdn) - continue; - if (!of_device_is_compatible(nvdn, "ibm,npu-link")) - continue; - if (!of_device_is_compatible(nvdn->parent, - "ibm,power9-npu")) - continue; -#ifdef CONFIG_PPC_POWERNV - WARN_ON_ONCE(pnv_npu2_init(hose)); -#endif - break; - } - } } /* diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index f9ae17e8a0f4..a8f9140a24fa 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index e1dc5d3254df..439ac72c2470 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return rc; } -const struct of_device_id drc_pmem_match[] = { +static const struct of_device_id drc_pmem_match[] = { { .type = "ibm,persistent-memory", }, {} }; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4fe48c04c6c2..1f051a786fb3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void); /* Poweron flag used for enabling auto ups restart */ extern unsigned long rtas_poweron_auto; -/* Provided by HVC VIO */ -extern void hvc_vio_init_early(void); - /* Dynamic logical Partitioning/Mobility */ extern void dlpar_free_cc_nodes(struct device_node *); extern void dlpar_free_cc_property(struct property *); @@ -55,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *); extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); +extern int dlpar_unisolate_drc(u32 drc_index); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); @@ -111,6 +109,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f8b390a9d9fb..167f2e1b8d39 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs) if ((be64_to_cpu(regs->msr) & (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { - regs->nip = be64_to_cpu((__be64)regs->nip); - regs->msr = 0; + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); } #endif @@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; - else if (severity == RTAS_SEVERITY_FATAL) - mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; @@ -699,7 +697,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_I_CACHE: - mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; case MC_ERROR_TYPE_UNKNOWN: default: diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 81343908ed33..f8f73b47b107 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id) return i; } -void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) { int i; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 46e1540abc22..631a0d57b6cd 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include <asm/swiotlb.h> #include <asm/svm.h> #include <asm/dtl.h> +#include <asm/hvconsole.h> #include "pseries.h" #include "../../../../drivers/pci/pci.h" @@ -85,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ int ibm_nmi_interlock_token; +u32 pseries_security_flavor; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -534,13 +536,28 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. + * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if + * H_CPU_BEHAV_FAVOUR_SECURITY is. */ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 1; + else + pseries_security_flavor = 2; if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER) + security_ftr_clear(SEC_FTR_STF_BARRIER); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c70b4be9f0a5..096629f54576 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (check_kvm_guest()) { + check_kvm_guest(); + + if (is_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 7b739cc7a8a9..1d829e257996 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -55,9 +55,9 @@ void __init svm_swiotlb_init(void) if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) return; - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + + memblock_free_early(__pa(vstart), + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 000000000000..b5c1cf1bc64d --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#include <asm/vas.h> +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static struct vas_all_caps caps_all; +static bool copypaste_feat; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + u32 lpid = mfspr(SPRN_PID); + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, lpid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", + rc, win->vas_win.winid, lpid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", + hcall, rc, query_type, result); + return -EIO; +} +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + + return IRQ_HANDLED; +} + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, NULL, + pseries_vas_fault_thread_fn, IRQF_ONESHOT, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > + atomic_read(&cop_feat_caps->target_lpar_creds)) { + pr_err("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + txwin->win_type = cop_feat_caps->win_type; + mutex_lock(&vas_pseries_mutex); + list_add(&txwin->win_list, &caps->list); + mutex_unlock(&vas_pseries_mutex); + + return &txwin->vas_win; + +out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); + h_deallocate_vas_window(txwin->vas_win.winid); +out: + atomic_dec(&cop_feat_caps->used_lpar_creds); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ + rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + + list_del(&win->win_list); + atomic_dec(&caps->used_lpar_creds); + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc; + + if (!copypaste_feat) + return -ENOTSUPP; + + rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->target_lpar_creds, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + copypaste_feat = true; + + return 0; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_cop_feat_caps *hv_cop_caps; + struct hv_vas_all_caps *hv_caps; + int rc; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); + if (!hv_cop_caps) { + rc = -ENOMEM; + goto out; + } + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + + if (rc) + goto out_cop; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); + if (rc) + goto out_cop; + } + + pr_info("GZIP feature is available\n"); + +out_cop: + kfree(hv_cop_caps); +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h new file mode 100644 index 000000000000..4ecb3fcabd10 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2020-21 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include <asm/vas.h> +#include <linux/mutex.h> +#include <linux/stringify.h> + +/* + * VAS window modify flags + */ +#define VAS_MOD_WIN_CLOSE PPC_BIT(0) +#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1) +#define VAS_MOD_WIN_DR PPC_BIT(3) +#define VAS_MOD_WIN_PR PPC_BIT(4) +#define VAS_MOD_WIN_SF PPC_BIT(5) +#define VAS_MOD_WIN_TA PPC_BIT(6) +#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \ + VAS_MOD_WIN_PR | VAS_MOD_WIN_SF) + +#define VAS_WIN_ACTIVE 0x0 +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +/* Process of being modified, deallocated, or quiesced */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 + +#define VAS_COPY_PASTE_USER_MODE 0x00000001 +#define VAS_COP_OP_USER_MODE 0x00000010 + +/* + * Co-processor feature - GZIP QoS windows or GZIP default windows + */ +enum vas_cop_feat_type { + VAS_GZIP_QOS_FEAT_TYPE, + VAS_GZIP_DEF_FEAT_TYPE, + VAS_MAX_FEAT_TYPE, +}; + +/* + * Use to get feature specific capabilities from the + * hypervisor. + */ +struct hv_vas_cop_feat_caps { + __be64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; + __be16 max_lpar_creds; + __be16 max_win_creds; + union { + __be16 reserved; + __be16 def_lpar_creds; /* Used for default capabilities */ + }; + __be16 target_lpar_creds; +} __packed __aligned(0x1000); + +/* + * Feature specific (QoS or default) capabilities. + */ +struct vas_cop_feat_caps { + u64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; /* User mode copy/paste or COP HCALL */ + u16 max_lpar_creds; /* Max credits available in LPAR */ + /* Max credits can be assigned per window */ + u16 max_win_creds; + union { + u16 reserved; /* Used for QoS credit type */ + u16 def_lpar_creds; /* Used for default credit type */ + }; + /* Total LPAR available credits. Can be different from max LPAR */ + /* credits due to DLPAR operation */ + atomic_t target_lpar_creds; + atomic_t used_lpar_creds; /* Used credits so far */ + u16 avail_lpar_creds; /* Remaining available credits */ +}; + +/* + * Feature (QoS or Default) specific to store capabilities and + * the list of open windows. + */ +struct vas_caps { + struct vas_cop_feat_caps caps; + struct list_head list; /* List of open windows */ +}; + +/* + * To get window information from the hypervisor. + */ +struct hv_vas_win_lpar { + __be16 version; + u8 win_type; + u8 status; + __be16 credits; /* No of credits assigned to this window */ + __be16 reserved; + __be32 pid; /* LPAR Process ID */ + __be32 tid; /* LPAR Thread ID */ + __be64 win_addr; /* Paste address */ + __be32 interrupt; /* Interrupt when NX request completes */ + __be32 fault; /* Interrupt when NX sees fault */ + /* Associativity Domain Identifiers as returned in */ + /* H_HOME_NODE_ASSOCIATIVITY */ + __be64 domain[6]; + __be64 win_util; /* Number of bytes processed */ +} __packed __aligned(0x1000); + +struct pseries_vas_window { + struct vas_window vas_win; + u64 win_addr; /* Physical paste address */ + u8 win_type; /* QoS or Default window */ + u32 complete_irq; /* Completion interrupt */ + u32 fault_irq; /* Fault interrupt */ + u64 domain[6]; /* Associativity domain Ids */ + /* this window is allocated */ + u64 util; + + /* List of windows opened which is used for LPM */ + struct list_head win_list; + u64 flags; + char *name; + int fault_virq; +}; +#endif /* _VAS_H */ diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index b2797cfe4e2b..e00f3725ec96 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -22,6 +22,7 @@ #include <linux/mm.h> #include <linux/dma-map-ops.h> #include <linux/kobject.h> +#include <linux/kexec.h> #include <asm/iommu.h> #include <asm/dma.h> @@ -1261,7 +1262,6 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); struct device *devptr; - int ret = 1; /* * Hold a reference to the device after the remove function is called @@ -1270,13 +1270,27 @@ static int vio_bus_remove(struct device *dev) devptr = get_device(dev); if (viodrv->remove) - ret = viodrv->remove(viodev); + viodrv->remove(viodev); - if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_bus_remove(viodev); put_device(devptr); - return ret; + return 0; +} + +static void vio_bus_shutdown(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv; + + if (dev->driver) { + viodrv = to_vio_driver(dev->driver); + if (viodrv->shutdown) + viodrv->shutdown(viodev); + else if (kexec_in_progress) + vio_bus_remove(dev); + } } /** @@ -1286,6 +1300,10 @@ static int vio_bus_remove(struct device *dev) int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) { + // vio_bus_type is only initialised for pseries + if (!machine_is(pseries)) + return -ENODEV; + pr_debug("%s: driver %s registering\n", __func__, viodrv->name); /* fill in 'struct driver' fields */ @@ -1614,6 +1632,7 @@ struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .shutdown = vio_bus_shutdown, }; /** |