From 4f916593be9da38c5cf0d3a5c386b57beb70f422 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 26 Aug 2019 14:55:20 +1000 Subject: KVM: PPC: Book3S: Fix incorrect guest-to-user-translation error handling H_PUT_TCE_INDIRECT handlers receive a page with up to 512 TCEs from a guest. Although we verify correctness of TCEs before we do anything with the existing tables, there is a small window when a check in kvmppc_tce_validate might pass and right after that the guest alters the page with TCEs which can cause early exit from the handler and leave srcu_read_lock(&vcpu->kvm->srcu) (virtual mode) or lock_rmap(rmap) (real mode) locked. This fixes the bug by jumping to the common exit code with an appropriate unlock. Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190826045520.92153-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_64_vio.c | 6 ++++-- arch/powerpc/kvm/book3s_64_vio_hv.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index e99a14798ab0..c4b606fe73eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -660,8 +660,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, } tce = be64_to_cpu(tce); - if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) - return H_PARAMETER; + if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { + ret = H_PARAMETER; + goto unlock_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index f50bbeedfc66..b4f20f13b860 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -556,8 +556,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) - return H_PARAMETER; + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + ret = H_PARAMETER; + goto unlock_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, -- cgit v1.2.3-59-g8ed1b From 35872480da47ec714fd9c4f2f3d2d83daf304851 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Aug 2019 18:52:48 +1000 Subject: powerpc/powernv/ioda: Split out TCE invalidation from TCE updates At the moment updates in a TCE table are made by iommu_table_ops::exchange which update one TCE and invalidates an entry in the PHB/NPU TCE cache via set of registers called "TCE Kill" (hence the naming). Writing a TCE is a simple xchg() but invalidating the TCE cache is a relatively expensive OPAL call. Mapping a 100GB guest with PCI+NPU passed through devices takes about 20s. Thankfully we can do better. Since such big mappings happen at the boot time and when memory is plugged/onlined (i.e. not often), these requests come in 512 pages so we call call OPAL 512 times less which brings 20s from the above to less than 10s. Also, since TCE caches can be flushed entirely, calling OPAL for 512 TCEs helps skiboot [1] to decide whether to flush the entire cache or not. This implements 2 new iommu_table_ops callbacks: - xchg_no_kill() to update a single TCE with no TCE invalidation; - tce_kill() to invalidate multiple TCEs. This uses the same xchg_no_kill() callback for IODA1/2. This implements 2 new wrappers on top of the new callbacks similar to the existing iommu_tce_xchg(). This does not use the new callbacks yet, the next patches will; so this should not cause any behavioral change. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190829085252.72370-2-aik@ozlabs.ru --- arch/powerpc/include/asm/iommu.h | 17 +++++++++++++++++ arch/powerpc/kernel/iommu.c | 27 +++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 12 ++++++++++++ 3 files changed, 56 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 18d342b815e4..babddede9245 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -58,6 +58,17 @@ struct iommu_table_ops { unsigned long *hpa, enum dma_data_direction *direction); + int (*xchg_no_kill)(struct iommu_table *tbl, + long index, + unsigned long *hpa, + enum dma_data_direction *direction, + bool realmode); + + void (*tce_kill)(struct iommu_table *tbl, + unsigned long index, + unsigned long pages, + bool realmode); + __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc); #endif void (*clear)(struct iommu_table *tbl, @@ -206,6 +217,12 @@ extern void iommu_del_device(struct device *dev); extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction); +extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction); +extern void iommu_tce_kill(struct iommu_table *tbl, + unsigned long entry, unsigned long pages); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0a67ce9f827e..145f29cf7e4c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1005,6 +1005,33 @@ long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); +extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) +{ + long ret; + unsigned long size = 0; + + ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false); + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL)) && + !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift, + &size)) + SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT)); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill); + +void iommu_tce_kill(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + if (tbl->it_ops->tce_kill) + tbl->it_ops->tce_kill(tbl, entry, pages, false); +} +EXPORT_SYMBOL_GPL(iommu_tce_kill); + int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d8080558d020..911f96abed89 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1939,6 +1939,14 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, } #ifdef CONFIG_IOMMU_API +/* Common for IODA1 and IODA2 */ +static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction, + bool realmode) +{ + return pnv_tce_xchg(tbl, index, hpa, direction, !realmode); +} + static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { @@ -1975,6 +1983,8 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda1_tce_xchg, .exchange_rm = pnv_ioda1_tce_xchg_rm, + .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, + .tce_kill = pnv_pci_p7ioc_tce_invalidate, .useraddrptr = pnv_tce_useraddrptr, #endif .clear = pnv_ioda1_tce_free, @@ -2140,6 +2150,8 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda2_tce_xchg, .exchange_rm = pnv_ioda2_tce_xchg_rm, + .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, + .tce_kill = pnv_pci_ioda2_tce_invalidate, .useraddrptr = pnv_tce_useraddrptr, #endif .clear = pnv_ioda2_tce_free, -- cgit v1.2.3-59-g8ed1b From 01b7d128b5a7f0d09626c090093ff44155f347c9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Aug 2019 18:52:49 +1000 Subject: KVM: PPC: Book3S: Invalidate multiple TCEs at once Invalidating a TCE cache entry for each updated TCE is quite expensive. This makes use of the new iommu_table_ops::xchg_no_kill()/tce_kill() callbacks to bring down the time spent in mapping a huge guest DMA window; roughly 20s to 10s for each guest's 100GB of DMA space. Signed-off-by: Alexey Kardashevskiy Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190829085252.72370-3-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_64_vio.c | 29 +++++++++++++++++++--------- arch/powerpc/kvm/book3s_64_vio_hv.c | 38 +++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index c4b606fe73eb..5834db0a54c6 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -416,7 +416,7 @@ static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg(mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir); } static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, @@ -447,7 +447,8 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir))) + if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, + &dir))) return H_TOO_HARD; if (dir == DMA_NONE) @@ -455,7 +456,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret != H_SUCCESS) - iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -501,7 +502,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (mm_iommu_mapped_inc(mem)) return H_TOO_HARD; - ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir); if (WARN_ON_ONCE(ret)) { mm_iommu_mapped_dec(mem); return H_TOO_HARD; @@ -579,6 +580,8 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); + iommu_tce_kill(stit->tbl, entry, 1); + if (ret != H_SUCCESS) { kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); goto unlock_exit; @@ -656,13 +659,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ if (get_user(tce, tces + i)) { ret = H_TOO_HARD; - goto unlock_exit; + goto invalidate_exit; } tce = be64_to_cpu(tce); if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; - goto unlock_exit; + goto invalidate_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -673,13 +676,17 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) { kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); - goto unlock_exit; + goto invalidate_exit; } } kvmppc_tce_put(stt, entry + i, tce); } +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill(stit->tbl, entry, npages); + unlock_exit: srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -718,7 +725,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - return ret; + goto invalidate_exit; WARN_ON_ONCE(1); kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); @@ -728,6 +735,10 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); - return H_SUCCESS; +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages); + + return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index b4f20f13b860..ab6eeb8e753e 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -218,13 +218,14 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, return H_SUCCESS; } -static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, +static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm, + struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction) { long ret; - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true); if (!ret && ((*direction == DMA_FROM_DEVICE) || (*direction == DMA_BIDIRECTIONAL))) { @@ -240,13 +241,20 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, return ret; } +extern void iommu_tce_kill_rm(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + if (tbl->it_ops->tce_kill) + tbl->it_ops->tce_kill(tbl, entry, pages, true); +} + static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry) { unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -278,7 +286,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir)) + if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir)) /* * real mode xchg can fail if struct page crosses * a page boundary @@ -290,7 +298,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret) - iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -336,7 +344,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_TOO_HARD; - ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); if (ret) { mm_iommu_mapped_dec(mem); /* @@ -417,6 +425,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); + iommu_tce_kill_rm(stit->tbl, entry, 1); + if (ret != H_SUCCESS) { kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); return ret; @@ -558,7 +568,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ua = 0; if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { ret = H_PARAMETER; - goto unlock_exit; + goto invalidate_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -569,13 +579,17 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) { kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); - goto unlock_exit; + goto invalidate_exit; } } kvmppc_rm_tce_put(stt, entry + i, tce); } +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill_rm(stit->tbl, entry, npages); + unlock_exit: if (rmap) unlock_rmap(rmap); @@ -618,7 +632,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - return ret; + goto invalidate_exit; WARN_ON_ONCE_RM(1); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); @@ -628,7 +642,11 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); - return H_SUCCESS; +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages); + + return ret; } /* This can be called in either virtual mode or real mode */ -- cgit v1.2.3-59-g8ed1b From 021b7868113cdca56297f2356d0f6fe89985c285 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Aug 2019 18:52:51 +1000 Subject: powerpc/pseries/iommu: Switch to xchg_no_kill This is the last implementation of iommu_table_ops::exchange() which we are about to remove. This implements xchg_no_kill() for pseries. Since it is paravirtual platform, the hypervisor does TCE invalidations and we do not have to deal with it here, hence no tce_kill() hook. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190829085252.72370-5-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 889dc2e44b89..a22ef9c7de2e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -621,7 +621,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) #ifdef CONFIG_IOMMU_API static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned - long *tce, enum dma_data_direction *direction) + long *tce, enum dma_data_direction *direction, + bool realmode) { long rc; unsigned long ioba = (unsigned long) index << tbl->it_page_shift; @@ -649,7 +650,7 @@ static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned struct iommu_table_ops iommu_table_lpar_multi_ops = { .set = tce_buildmulti_pSeriesLP, #ifdef CONFIG_IOMMU_API - .exchange = tce_exchange_pseries, + .xchg_no_kill = tce_exchange_pseries, #endif .clear = tce_freemulti_pSeriesLP, .get = tce_get_pSeriesLP -- cgit v1.2.3-59-g8ed1b From a102f139aac54689eeb05883952742ae780159f3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 29 Aug 2019 18:52:52 +1000 Subject: powerpc/powernv/ioda: Remove obsolete iommu_table_ops::exchange callbacks As now we have xchg_no_kill/tce_kill, these are not used anymore so remove them. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190829085252.72370-6-aik@ozlabs.ru --- arch/powerpc/include/asm/iommu.h | 10 ------- arch/powerpc/kernel/iommu.c | 26 +--------------- arch/powerpc/platforms/powernv/pci-ioda.c | 50 ------------------------------- 3 files changed, 1 insertion(+), 85 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index babddede9245..9ed0206cd98c 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -48,16 +48,6 @@ struct iommu_table_ops { * returns old TCE and DMA direction mask. * @tce is a physical address. */ - int (*exchange)(struct iommu_table *tbl, - long index, - unsigned long *hpa, - enum dma_data_direction *direction); - /* Real mode */ - int (*exchange_rm)(struct iommu_table *tbl, - long index, - unsigned long *hpa, - enum dma_data_direction *direction); - int (*xchg_no_kill)(struct iommu_table *tbl, long index, unsigned long *hpa, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 145f29cf7e4c..bf803000e4b3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -981,30 +981,6 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) } EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); -long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl, - unsigned long entry, unsigned long *hpa, - enum dma_data_direction *direction) -{ - long ret; - unsigned long size = 0; - - ret = tbl->it_ops->exchange(tbl, entry, hpa, direction); - - if (!ret && ((*direction == DMA_FROM_DEVICE) || - (*direction == DMA_BIDIRECTIONAL)) && - !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift, - &size)) - SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT)); - - /* if (unlikely(ret)) - pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << tbl->it_page_shift, - hwaddr, ret); */ - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_tce_xchg); - extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, @@ -1044,7 +1020,7 @@ int iommu_take_ownership(struct iommu_table *tbl) * requires exchange() callback defined so if it is not * implemented, we disallow taking ownership over the table. */ - if (!tbl->it_ops->exchange) + if (!tbl->it_ops->xchg_no_kill) return -EINVAL; spin_lock_irqsave(&tbl->large_pool.lock, flags); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 911f96abed89..22a51aa75796 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1946,28 +1946,6 @@ static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index, { return pnv_tce_xchg(tbl, index, hpa, direction, !realmode); } - -static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); - - if (!ret) - pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); - - return ret; -} - -static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); - - if (!ret) - pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true); - - return ret; -} #endif static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, @@ -1981,8 +1959,6 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, static struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build, #ifdef CONFIG_IOMMU_API - .exchange = pnv_ioda1_tce_xchg, - .exchange_rm = pnv_ioda1_tce_xchg_rm, .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, .tce_kill = pnv_pci_p7ioc_tce_invalidate, .useraddrptr = pnv_tce_useraddrptr, @@ -2113,30 +2089,6 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, return ret; } -#ifdef CONFIG_IOMMU_API -static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); - - if (!ret) - pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); - - return ret; -} - -static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); - - if (!ret) - pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true); - - return ret; -} -#endif - static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, long npages) { @@ -2148,8 +2100,6 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, #ifdef CONFIG_IOMMU_API - .exchange = pnv_ioda2_tce_xchg, - .exchange_rm = pnv_ioda2_tce_xchg_rm, .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, .tce_kill = pnv_pci_ioda2_tce_invalidate, .useraddrptr = pnv_tce_useraddrptr, -- cgit v1.2.3-59-g8ed1b From 70ed86f4de5bd74dd2d884dcd2f3275c4cfe665f Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Thu, 29 Aug 2019 12:50:20 -0300 Subject: powerpc: Add PowerPC Capabilities ELF note Add the PowerPC name and the PPC_ELFNOTE_CAPABILITIES type in the kernel binary ELF note. This type is a bitmap that can be used to advertise kernel capabilities to userland. This patch also defines PPCCAP_ULTRAVISOR_BIT as being the bit zero. Suggested-by: Paul Mackerras Signed-off-by: Claudio Carvalho [ maxiwell: Define the 'PowerPC' type in the elfnote.h ] Signed-off-by: Maxiwell S. Garcia Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190829155021.2915-2-maxiwell@linux.ibm.com --- arch/powerpc/include/asm/elfnote.h | 24 +++++++++++++++++++++++ arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/note.S | 40 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/elfnote.h create mode 100644 arch/powerpc/kernel/note.S (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/elfnote.h b/arch/powerpc/include/asm/elfnote.h new file mode 100644 index 000000000000..a201b6e9ae44 --- /dev/null +++ b/arch/powerpc/include/asm/elfnote.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PowerPC ELF notes. + * + * Copyright 2019, IBM Corporation + */ + +#ifndef __ASM_POWERPC_ELFNOTE_H__ +#define __ASM_POWERPC_ELFNOTE_H__ + +/* + * These note types should live in a SHT_NOTE segment and have + * "PowerPC" in the name field. + */ + +/* + * The capabilities supported/required by this kernel (bitmap). + * + * This type uses a bitmap as "desc" field. Each bit is described + * in arch/powerpc/kernel/note.S + */ +#define PPC_ELFNOTE_CAPABILITIES 1 + +#endif /* __ASM_POWERPC_ELFNOTE_H__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ea0c69236789..d4eb50de13b1 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -53,7 +53,7 @@ obj-y := cputable.o ptrace.o syscalls.o \ dma-common.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ - paca.o nvram_64.o firmware.o + paca.o nvram_64.o firmware.o note.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S new file mode 100644 index 000000000000..bcdad15395dd --- /dev/null +++ b/arch/powerpc/kernel/note.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PowerPC ELF notes. + * + * Copyright 2019, IBM Corporation + */ + +#include +#include + +/* + * Ultravisor-capable bit (PowerNV only). + * + * Bit 0 indicates that the powerpc kernel binary knows how to run in an + * ultravisor-enabled system. + * + * In an ultravisor-enabled system, some machine resources are now controlled + * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up + * being run on a machine with ultravisor, the kernel will probably crash + * trying to access ultravisor resources. For instance, it may crash in early + * boot trying to set the partition table entry 0. + * + * In an ultravisor-enabled system, a bootloader could warn the user or prevent + * the kernel from being run if the PowerPC ultravisor capability doesn't exist + * or the Ultravisor-capable bit is not set. + */ +#ifdef CONFIG_PPC_POWERNV +#define PPCCAP_ULTRAVISOR_BIT (1 << 0) +#else +#define PPCCAP_ULTRAVISOR_BIT 0 +#endif + +/* + * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that + * can be used to advertise kernel capabilities to userland. + */ +#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT) + +ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES, + .long PPC_CAPABILITIES_BITMAP) -- cgit v1.2.3-59-g8ed1b From a49dddbdb0cca1d00fc9251e543a0aac09a6a65b Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Thu, 22 Aug 2019 00:48:33 -0300 Subject: powerpc/kernel: Add ucall_norets() ultravisor call handler The ultracalls (ucalls for short) allow the Secure Virtual Machines (SVM)s and hypervisor to request services from the ultravisor such as accessing a register or memory region that can only be accessed when running in ultravisor-privileged mode. This patch adds the ucall_norets() ultravisor call handler. The specific service needed from an ucall is specified in register R3 (the first parameter to the ucall). Other parameters to the ucall, if any, are specified in registers R4 through R12. Return value of all ucalls is in register R3. Other output values from the ucall, if any, are returned in registers R4 through R12. Each ucall returns specific error codes, applicable in the context of the ucall. However, like with the PowerPC Architecture Platform Reference (PAPR), if no specific error code is defined for a particular situation, then the ucall will fallback to an erroneous parameter-position based code. i.e U_PARAMETER, U_P2, U_P3 etc depending on the ucall parameter that may have caused the error. Every host kernel (powernv) needs to be able to do ucalls in case it ends up being run in a machine with ultravisor enabled. Otherwise, the kernel may crash early in boot trying to access ultravisor resources, for instance, trying to set the partition table entry 0. Secure guests also need to be able to do ucalls and its kernel may not have CONFIG_PPC_POWERNV=y. For that reason, the ucall.S file is placed under arch/powerpc/kernel. If ultravisor is not enabled, the ucalls will be redirected to the hypervisor which must handle/fail the call. Thanks to inputs from Ram Pai and Michael Anderson. Signed-off-by: Claudio Carvalho Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-3-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/asm-prototypes.h | 11 +++++++++++ arch/powerpc/include/asm/ultravisor-api.h | 23 +++++++++++++++++++++++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/ucall.S | 14 ++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 arch/powerpc/include/asm/ultravisor-api.h create mode 100644 arch/powerpc/kernel/ucall.S (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index ec1c97a8e8cb..e698f48cbc6d 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -34,6 +35,16 @@ extern struct static_key hcall_tracepoint_key; void __trace_hcall_entry(unsigned long opcode, unsigned long *args); void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); +/* Ultravisor */ +#ifdef CONFIG_PPC_POWERNV +long ucall_norets(unsigned long opcode, ...); +#else +static inline long ucall_norets(unsigned long opcode, ...) +{ + return U_NOT_AVAILABLE; +} +#endif + /* OPAL */ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h new file mode 100644 index 000000000000..88ffa78f9d61 --- /dev/null +++ b/arch/powerpc/include/asm/ultravisor-api.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Ultravisor API. + * + * Copyright 2019, IBM Corporation. + * + */ +#ifndef _ASM_POWERPC_ULTRAVISOR_API_H +#define _ASM_POWERPC_ULTRAVISOR_API_H + +#include + +/* Return codes */ +#define U_FUNCTION H_FUNCTION +#define U_NOT_AVAILABLE H_NOT_AVAILABLE +#define U_P2 H_P2 +#define U_P3 H_P3 +#define U_P4 H_P4 +#define U_P5 H_P5 +#define U_PARAMETER H_PARAMETER +#define U_SUCCESS H_SUCCESS + +#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index d4eb50de13b1..934e64b28894 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -156,6 +156,7 @@ endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o +obj-$(CONFIG_PPC_POWERNV) += ucall.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S new file mode 100644 index 000000000000..07296bc39166 --- /dev/null +++ b/arch/powerpc/kernel/ucall.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generic code to perform an ultravisor call. + * + * Copyright 2019, IBM Corporation. + * + */ +#include +#include + +_GLOBAL(ucall_norets) +EXPORT_SYMBOL_GPL(ucall_norets) + sc 2 /* Invoke the ultravisor */ + blr /* Return r3 = status */ -- cgit v1.2.3-59-g8ed1b From bb04ffe85eebebd64d5e673a9434d968e80f3aa1 Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Thu, 22 Aug 2019 00:48:34 -0300 Subject: powerpc/powernv: Introduce FW_FEATURE_ULTRAVISOR In PEF enabled systems, some of the resources which were previously hypervisor privileged are now ultravisor privileged and controlled by the ultravisor firmware. This adds FW_FEATURE_ULTRAVISOR to indicate if PEF is enabled. The host kernel can use FW_FEATURE_ULTRAVISOR, for instance, to skip accessing resources (e.g. PTCR and LDBAR) in case PEF is enabled. Signed-off-by: Claudio Carvalho [ andmike: Device node name to "ibm,ultravisor" ] Signed-off-by: Michael Anderson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-4-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/firmware.h | 5 +++-- arch/powerpc/include/asm/ultravisor.h | 14 ++++++++++++++ arch/powerpc/kernel/prom.c | 4 ++++ arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/ultravisor.c | 24 ++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/include/asm/ultravisor.h create mode 100644 arch/powerpc/platforms/powernv/ultravisor.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index faeca8b76c8c..b3e214a97f3a 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -50,6 +50,7 @@ #define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) #define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000) #define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000) +#define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000) #ifndef __ASSEMBLY__ @@ -68,9 +69,9 @@ enum { FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | - FW_FEATURE_PAPR_SCM, + FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR, FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h new file mode 100644 index 000000000000..dc6e1ea198f2 --- /dev/null +++ b/arch/powerpc/include/asm/ultravisor.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Ultravisor definitions + * + * Copyright 2019, IBM Corporation. + * + */ +#ifndef _ASM_POWERPC_ULTRAVISOR_H +#define _ASM_POWERPC_ULTRAVISOR_H + +int early_init_dt_scan_ultravisor(unsigned long node, const char *uname, + int depth, void *data); + +#endif /* _ASM_POWERPC_ULTRAVISOR_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7159e791a70d..5828f1c81dc9 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -55,6 +55,7 @@ #include #include #include +#include #include @@ -702,6 +703,9 @@ void __init early_init_devtree(void *params) #ifdef CONFIG_PPC_POWERNV /* Some machines might need OPAL info for debugging, grab it now. */ of_scan_flat_dt(early_init_dt_scan_opal, NULL); + + /* Scan tree for ultravisor feature */ + of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL); #endif #ifdef CONFIG_FA_DUMP diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index da2e99efbd04..2c27c8ac00c8 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -4,6 +4,7 @@ obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o +obj-y += ultravisor.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c new file mode 100644 index 000000000000..02ac57b4bded --- /dev/null +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ultravisor high level interfaces + * + * Copyright 2019, IBM Corporation. + * + */ +#include +#include +#include + +#include +#include + +int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, + int depth, void *data) +{ + if (!of_flat_dt_is_compatible(node, "ibm,ultravisor")) + return 0; + + powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR; + pr_debug("Ultravisor detected!\n"); + return 1; +} -- cgit v1.2.3-59-g8ed1b From 139a1d2842ec181cf017502a46bb8d947682a960 Mon Sep 17 00:00:00 2001 From: Michael Anderson Date: Thu, 22 Aug 2019 00:48:35 -0300 Subject: powerpc/mm: Use UV_WRITE_PATE ucall to register a PATE When Ultravisor (UV) is enabled, the partition table is stored in secure memory and can only be accessed via the UV. The Hypervisor (HV) however maintains a copy of the partition table in normal memory to allow Nest MMU translations to occur (for normal VMs). The HV copy includes partition table entries (PATE)s for secure VMs which would currently be unused (Nest MMU translations cannot access secure memory) but they would be needed as we add functionality. This patch adds the UV_WRITE_PATE ucall which is used to update the PATE for a VM (both normal and secure) when Ultravisor is enabled. Signed-off-by: Michael Anderson Signed-off-by: Madhavan Srinivasan Signed-off-by: Ram Pai [ cclaudio: Write the PATE in HV's table before doing that in UV's ] Signed-off-by: Claudio Carvalho Reviewed-by: Ryan Grimm Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-5-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/ultravisor-api.h | 5 ++++ arch/powerpc/include/asm/ultravisor.h | 8 +++++ arch/powerpc/mm/book3s64/pgtable.c | 50 +++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h index 88ffa78f9d61..8cd49abff4f3 100644 --- a/arch/powerpc/include/asm/ultravisor-api.h +++ b/arch/powerpc/include/asm/ultravisor-api.h @@ -11,6 +11,7 @@ #include /* Return codes */ +#define U_BUSY H_BUSY #define U_FUNCTION H_FUNCTION #define U_NOT_AVAILABLE H_NOT_AVAILABLE #define U_P2 H_P2 @@ -18,6 +19,10 @@ #define U_P4 H_P4 #define U_P5 H_P5 #define U_PARAMETER H_PARAMETER +#define U_PERMISSION H_PERMISSION #define U_SUCCESS H_SUCCESS +/* opcodes */ +#define UV_WRITE_PATE 0xF104 + #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */ diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h index dc6e1ea198f2..6fe1f365dec8 100644 --- a/arch/powerpc/include/asm/ultravisor.h +++ b/arch/powerpc/include/asm/ultravisor.h @@ -8,7 +8,15 @@ #ifndef _ASM_POWERPC_ULTRAVISOR_H #define _ASM_POWERPC_ULTRAVISOR_H +#include +#include + int early_init_dt_scan_ultravisor(unsigned long node, const char *uname, int depth, void *data); +static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1) +{ + return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1); +} + #endif /* _ASM_POWERPC_ULTRAVISOR_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 7d0e0d0d22c4..4173f6931009 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -209,21 +211,10 @@ void __init mmu_partition_table_init(void) powernv_set_nmmu_ptcr(ptcr); } -void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, - unsigned long dw1) +static void flush_partition(unsigned int lpid, bool radix) { - unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); - - partition_tb[lpid].patb0 = cpu_to_be64(dw0); - partition_tb[lpid].patb1 = cpu_to_be64(dw1); - - /* - * Global flush of TLBs and partition table caches for this lpid. - * The type of flush (hash or radix) depends on what the previous - * use of this partition ID was, not the new use. - */ asm volatile("ptesync" : : : "memory"); - if (old & PATB_HR) { + if (radix) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : @@ -237,6 +228,39 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } + +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1) +{ + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); + + /* + * When ultravisor is enabled, the partition table is stored in secure + * memory and can only be accessed doing an ultravisor call. However, we + * maintain a copy of the partition table in normal memory to allow Nest + * MMU translations to occur (for normal VMs). + * + * Therefore, here we always update partition_tb, regardless of whether + * we are running under an ultravisor or not. + */ + partition_tb[lpid].patb0 = cpu_to_be64(dw0); + partition_tb[lpid].patb1 = cpu_to_be64(dw1); + + /* + * If ultravisor is enabled, we do an ultravisor call to register the + * partition table entry (PATE), which also do a global flush of TLBs + * and partition table caches for the lpid. Otherwise, just do the + * flush. The type of flush (hash or radix) depends on what the previous + * use of the partition ID was, not the new use. + */ + if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) { + uv_register_pate(lpid, dw0, dw1); + pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n", + dw0, dw1); + } else { + flush_partition(lpid, (old & PATB_HR)); + } +} EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); static pmd_t *get_pmd_from_cache(struct mm_struct *mm) -- cgit v1.2.3-59-g8ed1b From 5223134029a87db925ecc9449f9501bad391a52e Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Thu, 22 Aug 2019 00:48:36 -0300 Subject: powerpc/mm: Write to PTCR only if ultravisor disabled In ultravisor enabled systems, PTCR becomes ultravisor privileged only for writing and an attempt to write to it will cause a Hypervisor Emulation Assitance interrupt. This patch uses the set_ptcr_when_no_uv() function to restrict PTCR writing to only when ultravisor is disabled. Signed-off-by: Claudio Carvalho Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-6-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/ultravisor.h | 12 ++++++++++++ arch/powerpc/mm/book3s64/hash_utils.c | 5 +++-- arch/powerpc/mm/book3s64/pgtable.c | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 8 +++++--- 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h index 6fe1f365dec8..d7aa97aa7834 100644 --- a/arch/powerpc/include/asm/ultravisor.h +++ b/arch/powerpc/include/asm/ultravisor.h @@ -10,10 +10,22 @@ #include #include +#include int early_init_dt_scan_ultravisor(unsigned long node, const char *uname, int depth, void *data); +/* + * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for + * writing and an attempt to write to it will cause a Hypervisor Emulation + * Assistance interrupt. + */ +static inline void set_ptcr_when_no_uv(u64 val) +{ + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + mtspr(SPRN_PTCR, val); +} + static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1) { return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index b8ad14bb1170..88aab920caca 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -1075,8 +1076,8 @@ void hash__early_init_mmu_secondary(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else - mtspr(SPRN_PTCR, - __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + set_ptcr_when_no_uv(__pa(partition_tb) | + (PATB_SIZE_SHIFT - 12)); } /* Initialize SLB */ slb_initialize(); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 4173f6931009..01a7570c10e0 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -207,7 +207,7 @@ void __init mmu_partition_table_init(void) * 64 K size. */ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); - mtspr(SPRN_PTCR, ptcr); + set_ptcr_when_no_uv(ptcr); powernv_set_nmmu_ptcr(ptcr); } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b4ca9e95e678..5bc118b4a634 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -650,8 +651,9 @@ void radix__early_init_mmu_secondary(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); - mtspr(SPRN_PTCR, - __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + set_ptcr_when_no_uv(__pa(partition_tb) | + (PATB_SIZE_SHIFT - 12)); + radix_init_amor(); } @@ -667,7 +669,7 @@ void radix__mmu_cleanup_all(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) { lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); - mtspr(SPRN_PTCR, 0); + set_ptcr_when_no_uv(0); powernv_set_nmmu_ptcr(0); radix__flush_tlb_all(); } -- cgit v1.2.3-59-g8ed1b From 512a5a6452b6d742b6f713184414d28cb6413080 Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Thu, 22 Aug 2019 00:48:37 -0300 Subject: powerpc/powernv: Access LDBAR only if ultravisor disabled LDBAR is a per-thread SPR populated and used by the thread-imc pmu driver to dump the data counter into memory. It contains memory along with few other configuration bits. LDBAR is populated and enabled only when any of the thread imc pmu events are monitored. In ultravisor enabled systems, LDBAR becomes ultravisor privileged and an attempt to write to it will cause a Hypervisor Emulation Assistance interrupt. In ultravisor enabled systems, the ultravisor is responsible to maintain the LDBAR (e.g. save and restore it). This restricts LDBAR access to only when ultravisor is disabled. Signed-off-by: Claudio Carvalho Reviewed-by: Ram Pai Reviewed-by: Ryan Grimm Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-7-cclaudio@linux.ibm.com --- arch/powerpc/platforms/powernv/idle.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 09f49eed7fb8..78599bca66c2 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -675,7 +675,8 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) sprs.ptcr = mfspr(SPRN_PTCR); sprs.rpr = mfspr(SPRN_RPR); sprs.tscr = mfspr(SPRN_TSCR); - sprs.ldbar = mfspr(SPRN_LDBAR); + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + sprs.ldbar = mfspr(SPRN_LDBAR); sprs_saved = true; @@ -789,7 +790,8 @@ core_woken: mtspr(SPRN_MMCR0, sprs.mmcr0); mtspr(SPRN_MMCR1, sprs.mmcr1); mtspr(SPRN_MMCR2, sprs.mmcr2); - mtspr(SPRN_LDBAR, sprs.ldbar); + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + mtspr(SPRN_LDBAR, sprs.ldbar); mtspr(SPRN_SPRG3, local_paca->sprg_vdso); -- cgit v1.2.3-59-g8ed1b From 6c85b7bc637b64e681760f62c0eafba2f56745c6 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 22 Aug 2019 00:48:38 -0300 Subject: powerpc/kvm: Use UV_RETURN ucall to return to ultravisor When an SVM makes an hypercall or incurs some other exception, the Ultravisor usually forwards (a.k.a. reflects) the exceptions to the Hypervisor. After processing the exception, Hypervisor uses the UV_RETURN ultracall to return control back to the SVM. The expected register state on entry to this ultracall is: * Non-volatile registers are restored to their original values. * If returning from an hypercall, register R0 contains the return value (unlike other ultracalls) and, registers R4 through R12 contain any output values of the hypercall. * R3 contains the ultracall number, i.e UV_RETURN. * If returning with a synthesized interrupt, R2 contains the synthesized interrupt number. Thanks to input from Paul Mackerras, Ram Pai and Mike Anderson. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Claudio Carvalho Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-8-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/ultravisor-api.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 39 ++++++++++++++++++++++++++----- 4 files changed, 36 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e6e5f59aaa97..4bb552d639b8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -283,6 +283,7 @@ struct kvm_arch { cpumask_t cpu_in_guest; u8 radix; u8 fwnmi_enabled; + u8 secure_guest; bool threads_indep; bool nested_enable; pgd_t *pgtable; diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h index 8cd49abff4f3..6a0f9c74f959 100644 --- a/arch/powerpc/include/asm/ultravisor-api.h +++ b/arch/powerpc/include/asm/ultravisor-api.h @@ -24,5 +24,6 @@ /* opcodes */ #define UV_WRITE_PATE 0xF104 +#define UV_RETURN 0xF11C #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4ccb6b3a7fbd..484f54dab247 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -506,6 +506,7 @@ int main(void) OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); + OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 07181d0dfcb7..9a05b0d932ef 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -29,6 +29,7 @@ #include #include #include +#include /* Sign-extend HDEC if not on POWER9 */ #define EXTEND_HDEC(reg) \ @@ -1085,16 +1086,10 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r5, VCPU_LR(r4) - ld r6, VCPU_CR(r4) mtlr r5 - mtcr r6 ld r1, VCPU_GPR(R1)(r4) - ld r2, VCPU_GPR(R2)(r4) - ld r3, VCPU_GPR(R3)(r4) ld r5, VCPU_GPR(R5)(r4) - ld r6, VCPU_GPR(R6)(r4) - ld r7, VCPU_GPR(R7)(r4) ld r8, VCPU_GPR(R8)(r4) ld r9, VCPU_GPR(R9)(r4) ld r10, VCPU_GPR(R10)(r4) @@ -1112,10 +1107,42 @@ BEGIN_FTR_SECTION mtspr SPRN_HDSISR, r0 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r6, VCPU_KVM(r4) + lbz r7, KVM_SECURE_GUEST(r6) + cmpdi r7, 0 + ld r6, VCPU_GPR(R6)(r4) + ld r7, VCPU_GPR(R7)(r4) + bne ret_to_ultra + + lwz r0, VCPU_CR(r4) + mtcr r0 + ld r0, VCPU_GPR(R0)(r4) + ld r2, VCPU_GPR(R2)(r4) + ld r3, VCPU_GPR(R3)(r4) ld r4, VCPU_GPR(R4)(r4) HRFI_TO_GUEST b . +/* + * Use UV_RETURN ultracall to return control back to the Ultravisor after + * processing an hypercall or interrupt that was forwarded (a.k.a. reflected) + * to the Hypervisor. + * + * All registers have already been loaded, except: + * R0 = hcall result + * R2 = SRR1, so UV can detect a synthesized interrupt (if any) + * R3 = UV_RETURN + */ +ret_to_ultra: + lwz r0, VCPU_CR(r4) + mtcr r0 + + ld r0, VCPU_GPR(R3)(r4) + mfspr r2, SPRN_SRR1 + li r3, 0 + ori r3, r3, UV_RETURN + ld r4, VCPU_GPR(R4)(r4) + sc 2 /* * Enter the guest on a P9 or later system where we have exactly -- cgit v1.2.3-59-g8ed1b From dea45ea7775240047f70e2631e468f6b65d09493 Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Wed, 28 Aug 2019 23:05:20 +1000 Subject: powerpc/powernv/opal-msglog: Refactor memcons code This patch refactors the code in opal-msglog that operates on the OPAL memory console in order to make it cleaner and also allow the reuse of the new memcons_* functions. Signed-off-by: Claudio Carvalho Signed-off-by: Michael Ellerman Tested-by: Claudio Carvalho Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190828130521.26764-1-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/opal-msglog.c | 57 +++++++++++++++++++--------- 1 file changed, 39 insertions(+), 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index dc51d03c6370..d26da19a611f 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -29,23 +29,23 @@ struct memcons { static struct memcons *opal_memcons = NULL; -ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) +ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count) { const char *conbuf; ssize_t ret; size_t first_read = 0; uint32_t out_pos, avail; - if (!opal_memcons) + if (!mc) return -ENODEV; - out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos)); + out_pos = be32_to_cpu(READ_ONCE(mc->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ smp_rmb(); - conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys)); + conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys)); /* When the buffer has wrapped, read from the out_pos marker to the end * of the buffer, and then read the remaining data as in the un-wrapped @@ -53,7 +53,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) if (out_pos & MEMCONS_OUT_POS_WRAP) { out_pos &= MEMCONS_OUT_POS_MASK; - avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos; + avail = be32_to_cpu(mc->obuf_size) - out_pos; ret = memory_read_from_buffer(to, count, &pos, conbuf + out_pos, avail); @@ -71,7 +71,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) } /* Sanity check. The firmware should not do this to us. */ - if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) { + if (out_pos > be32_to_cpu(mc->obuf_size)) { pr_err("OPAL: memory console corruption. Aborting read.\n"); return -EINVAL; } @@ -86,6 +86,11 @@ out: return ret; } +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) +{ + return memcons_copy(opal_memcons, to, pos, count); +} + static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) @@ -98,32 +103,48 @@ static struct bin_attribute opal_msglog_attr = { .read = opal_msglog_read }; -void __init opal_msglog_init(void) +struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name) { u64 mcaddr; struct memcons *mc; - if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) { - pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n"); - return; + if (of_property_read_u64(node, mc_prop_name, &mcaddr)) { + pr_warn("%s property not found, no message log\n", + mc_prop_name); + goto out_err; } mc = phys_to_virt(mcaddr); if (!mc) { - pr_warn("OPAL: memory console address is invalid\n"); - return; + pr_warn("memory console address is invalid\n"); + goto out_err; } if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) { - pr_warn("OPAL: memory console version is invalid\n"); - return; + pr_warn("memory console version is invalid\n"); + goto out_err; } - /* Report maximum size */ - opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) + - be32_to_cpu(mc->obuf_size); + return mc; + +out_err: + return NULL; +} + +u32 memcons_get_size(struct memcons *mc) +{ + return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size); +} + +void __init opal_msglog_init(void) +{ + opal_memcons = memcons_init(opal_node, "ibm,opal-memcons"); + if (!opal_memcons) { + pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n"); + return; + } - opal_memcons = mc; + opal_msglog_attr.size = memcons_get_size(opal_memcons); } void __init opal_msglog_sysfs_init(void) -- cgit v1.2.3-59-g8ed1b From 68e0aa8ec5cedec48dd5b11df84afc956c8f85be Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Wed, 28 Aug 2019 23:05:21 +1000 Subject: powerpc/powernv: Add ultravisor message log interface The ultravisor (UV) provides an in-memory console which follows the OPAL in-memory console structure. This patch extends the OPAL msglog code to initialize the UV memory console and provide the "/sys/firmware/ultravisor/msglog" interface for userspace to view the UV message log. Signed-off-by: Claudio Carvalho Signed-off-by: Michael Ellerman Tested-by: Claudio Carvalho Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190828130521.26764-2-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/powernv.h | 5 ++++ arch/powerpc/platforms/powernv/ultravisor.c | 45 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index fd4a1c5a6369..1aa51c4fa904 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -30,4 +30,9 @@ extern void opal_event_shutdown(void); bool cpu_core_split_required(void); +struct memcons; +ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); +u32 memcons_get_size(struct memcons *mc); +struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index 02ac57b4bded..e4a00ad06f9d 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -8,9 +8,15 @@ #include #include #include +#include #include #include +#include + +#include "powernv.h" + +static struct kobject *ultravisor_kobj; int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, int depth, void *data) @@ -22,3 +28,42 @@ int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, pr_debug("Ultravisor detected!\n"); return 1; } + +static struct memcons *uv_memcons; + +static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + return memcons_copy(uv_memcons, to, pos, count); +} + +static struct bin_attribute uv_msglog_attr = { + .attr = {.name = "msglog", .mode = 0400}, + .read = uv_msglog_read +}; + +static int __init uv_init(void) +{ + struct device_node *node; + + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + return 0; + + node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); + if (!node) + return -ENODEV; + + uv_memcons = memcons_init(node, "memcons"); + if (!uv_memcons) + return -ENOENT; + + uv_msglog_attr.size = memcons_get_size(uv_memcons); + + ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj); + if (!ultravisor_kobj) + return -ENOMEM; + + return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr); +} +machine_subsys_initcall(powernv, uv_init); -- cgit v1.2.3-59-g8ed1b