aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 09:45:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 09:45:46 -0700
commit9e85ae6af6e907975f68d82ff127073ec024cb05 (patch)
tree3d3349b03da858e53ef8f8dce467e4a691eabf88 /arch/s390/mm
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha (diff)
parents390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs (diff)
downloadlinux-dev-9e85ae6af6e907975f68d82ff127073ec024cb05.tar.xz
linux-dev-9e85ae6af6e907975f68d82ff127073ec024cb05.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky: "The first part of the s390 updates for 4.14: - Add machine type 0x3906 for IBM z14 - Add IBM z14 TLB flushing improvements for KVM guests - Exploit the TOD clock epoch extension to provide a continuous TOD clock afer 2042/09/17 - Add NIAI spinlock hints for IBM z14 - Rework the vmcp driver and use CMA for the respone buffer of z/VM CP commands - Drop some s390 specific asm headers and use the generic version - Add block discard for DASD-FBA devices under z/VM - Add average request times to DASD statistics - A few of those constify patches which seem to be in vogue right now - Cleanup and bug fixes" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (50 commits) s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs s390/dasd: Add discard support for FBA devices s390/zcrypt: make CPRBX const s390/uaccess: avoid mvcos jump label s390/mm: use generic mm_hooks s390/facilities: fix typo s390/vmcp: simplify vmcp_response_free() s390/topology: Remove the unused parent_node() macro s390/dasd: Change unsigned long long to unsigned long s390/smp: convert cpuhp_setup_state() return code to zero on success s390: fix 'novx' early parameter handling s390/dasd: add average request times to dasd statistics s390/scm: use common completion path s390/pci: log changes to uid checking s390/vmcp: simplify vmcp_ioctl() s390/vmcp: return -ENOTTY for unknown ioctl commands s390/vmcp: split vmcp header file and move to uapi s390/vmcp: make use of contiguous memory allocator s390/cpcmd,vmcp: avoid GFP_DMA allocations s390/vmcp: fix uaccess check and avoid undefined behavior ...
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c10
-rw-r--r--arch/s390/mm/gmap.c163
-rw-r--r--arch/s390/mm/init.c60
-rw-r--r--arch/s390/mm/page-states.c192
-rw-r--r--arch/s390/mm/pageattr.c5
-rw-r--r--arch/s390/mm/pgalloc.c12
-rw-r--r--arch/s390/mm/pgtable.c154
-rw-r--r--arch/s390/mm/vmem.c47
8 files changed, 476 insertions, 167 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14f25798b001..bdabb013537b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
- table = table + ((address >> 53) & 0x7ff);
+ table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R1:%016lx ", *table);
@@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION2:
- table = table + ((address >> 42) & 0x7ff);
+ table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R2:%016lx ", *table);
@@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION3:
- table = table + ((address >> 31) & 0x7ff);
+ table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R3:%016lx ", *table);
@@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_SEGMENT:
- table = table + ((address >> 20) & 0x7ff);
+ table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("S:%016lx ", *table);
@@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
goto out;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
}
- table = table + ((address >> 12) & 0xff);
+ table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("P:%016lx ", *table);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fb3d3cdb370..9e1494e3d849 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit)
unsigned long *table;
unsigned long etype, atype;
- if (limit < (1UL << 31)) {
- limit = (1UL << 31) - 1;
+ if (limit < _REGION3_SIZE) {
+ limit = _REGION3_SIZE - 1;
atype = _ASCE_TYPE_SEGMENT;
etype = _SEGMENT_ENTRY_EMPTY;
- } else if (limit < (1UL << 42)) {
- limit = (1UL << 42) - 1;
+ } else if (limit < _REGION2_SIZE) {
+ limit = _REGION2_SIZE - 1;
atype = _ASCE_TYPE_REGION3;
etype = _REGION3_ENTRY_EMPTY;
- } else if (limit < (1UL << 53)) {
- limit = (1UL << 53) - 1;
+ } else if (limit < _REGION1_SIZE) {
+ limit = _REGION1_SIZE - 1;
atype = _ASCE_TYPE_REGION2;
etype = _REGION2_ENTRY_EMPTY;
} else {
@@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
spin_lock_init(&gmap->guest_table_lock);
spin_lock_init(&gmap->shadow_lock);
atomic_set(&gmap->ref_count, 1);
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
goto out_free;
page->index = 0;
@@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap)
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
@@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
@@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
}
spin_unlock(&gmap->guest_table_lock);
if (page)
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return 0;
}
@@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
/* Create higher level tables in the gmap page table */
table = gmap->table;
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
- table += (gaddr >> 53) & 0x7ff;
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
- gaddr & 0xffe0000000000000UL))
+ gaddr & _REGION1_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
- table += (gaddr >> 42) & 0x7ff;
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
- gaddr & 0xfffffc0000000000UL))
+ gaddr & _REGION2_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
- table += (gaddr >> 31) & 0x7ff;
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
- gaddr & 0xffffffff80000000UL))
+ gaddr & _REGION3_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
- table += (gaddr >> 20) & 0x7ff;
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
/* Walk the parent mm page table */
mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
@@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = gmap->table;
switch (gmap->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
- table += (gaddr >> 53) & 0x7ff;
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if (level == 4)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_REGION2:
- table += (gaddr >> 42) & 0x7ff;
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_REGION3:
- table += (gaddr >> 31) & 0x7ff;
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_SEGMENT:
- table += (gaddr >> 20) & 0x7ff;
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
- table += (gaddr >> 12) & 0xff;
+ table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
}
return table;
}
@@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
if (!table || *table & _PAGE_INVALID)
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1);
+ gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
}
@@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
int i;
BUG_ON(!gmap_is_shadow(sg));
- for (i = 0; i < 256; i++, raddr += 1UL << 12)
+ for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
pgt[i] = _PAGE_INVALID;
}
@@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1);
- sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
+ sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
*ste = _SEGMENT_ENTRY_EMPTY;
@@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
- for (i = 0; i < 2048; i++, raddr += 1UL << 20) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
continue;
pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
@@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1);
- r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
+ r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
*r3e = _REGION3_ENTRY_EMPTY;
@@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
/* Free segment table */
page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
- for (i = 0; i < 2048; i++, raddr += 1UL << 31) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
continue;
sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
@@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
/* Free segment table */
page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1);
- r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
+ r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
*r2e = _REGION2_ENTRY_EMPTY;
@@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
/* Free region 3 table */
page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
- for (i = 0; i < 2048; i++, raddr += 1UL << 42) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
continue;
r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
@@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
/* Free region 3 table */
page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1);
- r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
+ r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
*r1e = _REGION1_ENTRY_EMPTY;
@@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
/* Free region 2 table */
page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
- for (i = 0; i < 2048; i++, raddr += 1UL << 53) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
continue;
r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
@@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
/* Free region 2 table */
page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
/* protect after insertion, so it will get properly invalidated */
down_read(&parent->mm->mmap_sem);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096,
+ ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
PROT_READ, PGSTE_VSIE_BIT);
up_read(&parent->mm->mmap_sem);
spin_lock(&parent->shadow_lock);
@@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
BUG_ON(!gmap_is_shadow(sg));
/* Allocate a shadow region second table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
}
spin_unlock(&sg->guest_table_lock);
/* Make r2t read-only in parent gmap page table */
- raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1;
+ raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
origin = r2t & _REGION_ENTRY_ORIGIN;
- offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
@@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
BUG_ON(!gmap_is_shadow(sg));
/* Allocate a shadow region second table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
}
spin_unlock(&sg->guest_table_lock);
/* Make r3t read-only in parent gmap page table */
- raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2;
+ raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
origin = r3t & _REGION_ENTRY_ORIGIN;
- offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
@@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
/* Allocate a shadow segment table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
}
spin_unlock(&sg->guest_table_lock);
/* Make sgt read-only in parent gmap page table */
- raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3;
+ raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
origin = sgt & _REGION_ENTRY_ORIGIN;
- offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
@@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
}
spin_unlock(&sg->guest_table_lock);
/* Make pgt read-only in parent gmap page table (not the pgste) */
- raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT;
+ raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
spin_lock(&sg->guest_table_lock);
@@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
}
/* Check for top level table */
start = sg->orig_asce & _ASCE_ORIGIN;
- end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096;
+ end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
gaddr < end) {
/* The complete shadow table has to go */
@@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
return;
}
/* Remove the page table tree from on specific entry */
- head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12);
+ head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
gmap_for_each_rmap_safe(rmap, rnext, head) {
bits = rmap->raddr & _SHADOW_RMAP_MASK;
raddr = rmap->raddr ^ bits;
@@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- offset = offset * (4096 / sizeof(pte_t));
+ offset = offset * (PAGE_SIZE / sizeof(pte_t));
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
@@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm)
}
/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ * future creation of zero pages
+ * - This must be called after THP was enabled
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long addr;
+
+ for (addr = start; addr != end; addr += PAGE_SIZE) {
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (is_zero_pfn(pte_pfn(*ptep)))
+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_unmap_unlock(ptep, ptl);
+ }
+ return 0;
+}
+
+static inline void zap_zero_pages(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+}
+
+/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2137,6 +2168,7 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
+ zap_zero_pages(mm);
up_write(&mm->mmap_sem);
return 0;
}
@@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
static int __s390_enable_skey(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte)))
- ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
/* Clear storage key */
ptep_zap_key(walk->mm, addr, pte);
return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8111694ce55a..3b567838b905 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -26,6 +26,7 @@
#include <linux/poison.h>
#include <linux/initrd.h>
#include <linux/export.h>
+#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <asm/processor.h>
@@ -84,7 +85,7 @@ void __init paging_init(void)
psw_t psw;
init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > (1UL << 42)) {
+ if (VMALLOC_END > _REGION2_SIZE) {
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION2_ENTRY_EMPTY;
} else {
@@ -93,8 +94,7 @@ void __init paging_init(void)
}
init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
S390_lowcore.kernel_asce = init_mm.context.asce;
- clear_table((unsigned long *) init_mm.pgd, pgd_type,
- sizeof(unsigned long)*2048);
+ crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init();
/* enable virtual mapping in kernel mode */
@@ -137,6 +137,8 @@ void __init mem_init(void)
free_all_bootmem();
setup_zero_pages(); /* Setup zeroed pages. */
+ cmma_init_nodat();
+
mem_init_print_info(NULL);
}
@@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
+
+#ifdef CONFIG_CMA
+
+/* Prevent memory blocks which contain cma regions from going offline */
+
+struct s390_cma_mem_data {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int s390_cma_check_range(struct cma *cma, void *data)
+{
+ struct s390_cma_mem_data *mem_data;
+ unsigned long start, end;
+
+ mem_data = data;
+ start = cma_get_base(cma);
+ end = start + cma_get_size(cma);
+ if (end < mem_data->start)
+ return 0;
+ if (start >= mem_data->end)
+ return 0;
+ return -EBUSY;
+}
+
+static int s390_cma_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct s390_cma_mem_data mem_data;
+ struct memory_notify *arg;
+ int rc = 0;
+
+ arg = data;
+ mem_data.start = arg->start_pfn << PAGE_SHIFT;
+ mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
+ if (action == MEM_GOING_OFFLINE)
+ rc = cma_for_each_area(s390_cma_check_range, &mem_data);
+ return notifier_from_errno(rc);
+}
+
+static struct notifier_block s390_cma_mem_nb = {
+ .notifier_call = s390_cma_mem_notifier,
+};
+
+static int __init s390_cma_mem_init(void)
+{
+ return register_memory_notifier(&s390_cma_mem_nb);
+}
+device_initcall(s390_cma_mem_init);
+
+#endif /* CONFIG_CMA */
+
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 69a7b01ae746..07fa7b8ae233 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -10,9 +10,10 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <linux/gfp.h>
#include <linux/init.h>
-
+#include <asm/facility.h>
#include <asm/page-states.h>
static int cmma_flag = 1;
@@ -36,14 +37,16 @@ __setup("cmma=", cmma);
static inline int cmma_test_essa(void)
{
register unsigned long tmp asm("0") = 0;
- register int rc asm("1") = -EOPNOTSUPP;
+ register int rc asm("1");
+ /* test ESSA_GET_STATE */
asm volatile(
- " .insn rrf,0xb9ab0000,%1,%1,0,0\n"
+ " .insn rrf,0xb9ab0000,%1,%1,%2,0\n"
"0: la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+&d" (rc), "+&d" (tmp));
+ : "=&d" (rc), "+&d" (tmp)
+ : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP));
return rc;
}
@@ -51,11 +54,26 @@ void __init cmma_init(void)
{
if (!cmma_flag)
return;
- if (cmma_test_essa())
+ if (cmma_test_essa()) {
cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
}
-static inline void set_page_unstable(struct page *page, int order)
+static inline unsigned char get_page_state(struct page *page)
+{
+ unsigned char state;
+
+ asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (state)
+ : "a" (page_to_phys(page)),
+ "i" (ESSA_GET_STATE));
+ return state & 0x3f;
+}
+
+static inline void set_page_unused(struct page *page, int order)
{
int i, rc;
@@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order)
"i" (ESSA_SET_UNUSED));
}
-void arch_free_page(struct page *page, int order)
+static inline void set_page_stable_dat(struct page *page, int order)
{
- if (!cmma_flag)
- return;
- set_page_unstable(page, order);
+ int i, rc;
+
+ for (i = 0; i < (1 << order); i++)
+ asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (rc)
+ : "a" (page_to_phys(page + i)),
+ "i" (ESSA_SET_STABLE));
}
-static inline void set_page_stable(struct page *page, int order)
+static inline void set_page_stable_nodat(struct page *page, int order)
{
int i, rc;
@@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order)
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
: "=&d" (rc)
: "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE));
+ "i" (ESSA_SET_STABLE_NODAT));
+}
+
+static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd) || pmd_large(*pmd))
+ continue;
+ page = virt_to_page(pmd_val(*pmd));
+ set_bit(PG_arch_1, &page->flags);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pud_t *pud;
+ int i;
+
+ pud = pud_offset(p4d, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud) || pud_large(*pud))
+ continue;
+ if (!pud_folded(*pud)) {
+ page = virt_to_page(pud_val(*pud));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pmd(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ p4d_t *p4d;
+ int i;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d))
+ continue;
+ if (!p4d_folded(*p4d)) {
+ page = virt_to_page(p4d_val(*p4d));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pud(p4d, addr, next);
+ } while (p4d++, addr = next, addr != end);
+}
+
+static void mark_kernel_pgd(void)
+{
+ unsigned long addr, next;
+ struct page *page;
+ pgd_t *pgd;
+ int i;
+
+ addr = 0;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, MODULES_END);
+ if (pgd_none(*pgd))
+ continue;
+ if (!pgd_folded(*pgd)) {
+ page = virt_to_page(pgd_val(*pgd));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_p4d(pgd, addr, next);
+ } while (pgd++, addr = next, addr != MODULES_END);
+}
+
+void __init cmma_init_nodat(void)
+{
+ struct memblock_region *reg;
+ struct page *page;
+ unsigned long start, end, ix;
+
+ if (cmma_flag < 2)
+ return;
+ /* Mark pages used in kernel page tables */
+ mark_kernel_pgd();
+
+ /* Set all kernel pages not used for page tables to stable/no-dat */
+ for_each_memblock(memory, reg) {
+ start = memblock_region_memory_base_pfn(reg);
+ end = memblock_region_memory_end_pfn(reg);
+ page = pfn_to_page(start);
+ for (ix = start; ix < end; ix++, page++) {
+ if (__test_and_clear_bit(PG_arch_1, &page->flags))
+ continue; /* skip page table pages */
+ if (!list_empty(&page->lru))
+ continue; /* skip free pages */
+ set_page_stable_nodat(page, 0);
+ }
+ }
+}
+
+void arch_free_page(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_unused(page, order);
}
void arch_alloc_page(struct page *page, int order)
{
if (!cmma_flag)
return;
- set_page_stable(page, order);
+ if (cmma_flag < 2)
+ set_page_stable_dat(page, order);
+ else
+ set_page_stable_nodat(page, order);
+}
+
+void arch_set_page_dat(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_stable_dat(page, order);
+}
+
+void arch_set_page_nodat(struct page *page, int order)
+{
+ if (cmma_flag < 2)
+ return;
+ set_page_stable_nodat(page, order);
+}
+
+int arch_test_page_nodat(struct page *page)
+{
+ unsigned char state;
+
+ if (cmma_flag < 2)
+ return 0;
+ state = get_page_state(page);
+ return !!(state & 0x20);
}
void arch_set_page_states(int make_stable)
@@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable)
list_for_each(l, &zone->free_area[order].free_list[t]) {
page = list_entry(l, struct page, lru);
if (make_stable)
- set_page_stable(page, order);
+ set_page_stable_dat(page, 0);
else
- set_page_unstable(page, order);
+ set_page_unused(page, order);
}
}
spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 180481589246..552f898dfa74 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,6 +7,7 @@
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/set_memory.h>
@@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
pud_t new;
int i, ro, nx;
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
return -ENOMEM;
pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
@@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
return;
}
for (i = 0; i < nr; i++) {
- __ptep_ipte(address, pte, IPTE_GLOBAL);
+ __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
address += PAGE_SIZE;
pte++;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 18918e394ce4..c5b74dd61197 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
if (!page)
return NULL;
+ arch_set_page_dat(page, 2);
return (unsigned long *) page_to_phys(page);
}
@@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
int rc, notify;
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
- BUG_ON(mm->context.asce_limit < (1UL << 42));
+ BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
if (end >= TASK_SIZE_MAX)
return -ENOMEM;
rc = 0;
@@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
}
spin_lock_bh(&mm->page_table_lock);
pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit == (1UL << 42)) {
+ if (mm->context.asce_limit == _REGION2_SIZE) {
crst_table_init(table, _REGION2_ENTRY_EMPTY);
p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
} else {
@@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd_t *pgd;
/* downgrade should only happen from 3 to 2 levels (compat only) */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
+ BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
if (current->active_mm == mm) {
clear_user_asce();
@@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd = mm->pgd;
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
crst_table_free(mm, (unsigned long *) pgd);
@@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
__free_page(page);
return NULL;
}
+ arch_set_page_dat(page, 0);
/* Initialize page table */
table = (unsigned long *) page_to_phys(page);
if (mm_alloc_pgste(mm)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4a1f7366b17a..4198a71b8fdd 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -25,8 +25,49 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
+ }
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ }
+}
+
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ int nodat)
{
pte_t old;
@@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_ipte(addr, ptep, IPTE_LOCAL);
+ ptep_ipte_local(mm, addr, ptep, nodat);
else
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ ptep_ipte_global(mm, addr, ptep, nodat);
atomic_dec(&mm->context.flush_count);
return old;
}
static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ int nodat)
{
pte_t old;
@@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1;
} else
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ ptep_ipte_global(mm, addr, ptep, nodat);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_direct(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_direct(mm, addr, ptep, nodat);
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
preempt_enable();
return old;
@@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_lazy(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
preempt_enable();
return old;
@@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_lazy(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(old, pgste, mm);
pgste_set(ptep, pgste);
@@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_modify_prot_commit);
+static inline void pmdp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+}
+
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
@@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
old = *pmdp;
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
- if (!MACHINE_HAS_IDTE) {
- __pmdp_csp(pmdp);
- return old;
- }
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pmdp_idte(addr, pmdp, IDTE_LOCAL);
+ pmdp_idte_local(mm, addr, pmdp);
else
- __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
+ pmdp_idte_global(mm, addr, pmdp);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
- } else if (MACHINE_HAS_IDTE)
- __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
- else
- __pmdp_csp(pmdp);
+ } else {
+ pmdp_idte_global(mm, addr, pmdp);
+ }
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(pmdp_xchg_lazy);
-static inline pud_t pudp_flush_direct(struct mm_struct *mm,
- unsigned long addr, pud_t *pudp)
+static inline void pudp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
{
- pud_t old;
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
+}
- old = *pudp;
- if (pud_val(old) & _REGION_ENTRY_INVALID)
- return old;
- if (!MACHINE_HAS_IDTE) {
+static inline void pudp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
+ else
/*
* Invalid bit position is the same for pmd and pud, so we can
* re-use _pmd_csp() here
*/
__pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old;
+
+ old = *pudp;
+ if (pud_val(old) & _REGION_ENTRY_INVALID)
return old;
- }
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pudp_idte(addr, pudp, IDTE_LOCAL);
+ pudp_idte_local(mm, addr, pudp);
else
- __pudp_idte(addr, pudp, IDTE_GLOBAL);
+ pudp_idte_global(mm, addr, pudp);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
{
pte_t entry;
pgste_t pgste;
- int pte_i, pte_p;
+ int pte_i, pte_p, nodat;
pgste = pgste_get_lock(ptep);
entry = *ptep;
@@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
return -EAGAIN;
}
/* Change access rights and set pgste bit */
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
if (prot == PROT_NONE && !pte_i) {
- ptep_flush_direct(mm, addr, ptep);
+ ptep_flush_direct(mm, addr, ptep, nodat);
pgste = pgste_update_all(entry, pgste, mm);
pte_val(entry) |= _PAGE_INVALID;
}
if (prot == PROT_READ && !pte_p) {
- ptep_flush_direct(mm, addr, ptep);
+ ptep_flush_direct(mm, addr, ptep, nodat);
pte_val(entry) &= ~_PAGE_INVALID;
pte_val(entry) |= _PAGE_PROTECT;
}
@@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
{
pgste_t pgste;
+ int nodat;
pgste = pgste_get_lock(ptep);
/* notifier is called by the caller */
- ptep_flush_direct(mm, saddr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_flush_direct(mm, saddr, ptep, nodat);
/* don't touch the storage key - it belongs to parent pgste */
pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
pgste_set_unlock(ptep, pgste);
@@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte_t *ptep;
pte_t pte;
bool dirty;
+ int nodat;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
@@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_ipte_global(mm, addr, ptep, nodat);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
else
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d8398962a723..c0af0d7b6e5f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_alloc(size, size);
}
-static inline p4d_t *vmem_p4d_alloc(void)
+void *vmem_crst_alloc(unsigned long val)
{
- p4d_t *p4d = NULL;
+ unsigned long *table;
- p4d = vmem_alloc_pages(2);
- if (!p4d)
- return NULL;
- clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
- return p4d;
-}
-
-static inline pud_t *vmem_pud_alloc(void)
-{
- pud_t *pud = NULL;
-
- pud = vmem_alloc_pages(2);
- if (!pud)
- return NULL;
- clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
- return pud;
-}
-
-pmd_t *vmem_pmd_alloc(void)
-{
- pmd_t *pmd = NULL;
-
- pmd = vmem_alloc_pages(2);
- if (!pmd)
- return NULL;
- clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
- return pmd;
+ table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
}
pte_t __ref *vmem_pte_alloc(void)
@@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
- p4_dir = vmem_p4d_alloc();
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
}
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
- pu_dir = vmem_pud_alloc();
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pu_dir)
goto out;
p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
continue;
}
if (pud_none(*pu_dir)) {
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
goto out;
pud_populate(&init_mm, pu_dir, pm_dir);
@@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
- p4_dir = vmem_p4d_alloc();
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
@@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
- pu_dir = vmem_pud_alloc();
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pu_dir)
goto out;
p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
goto out;
pud_populate(&init_mm, pu_dir, pm_dir);