aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/s390/Kconfig25
-rw-r--r--arch/s390/defconfig2
-rw-r--r--arch/s390/kernel/compat_linux.c6
-rw-r--r--arch/s390/kernel/compat_linux.h31
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/ipl.c4
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kernel/ptrace.c10
-rw-r--r--arch/s390/kernel/setup.c98
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--arch/s390/lib/uaccess_mvcos.c53
-rw-r--r--arch/s390/lib/uaccess_pt.c320
-rw-r--r--arch/s390/mm/fault.c88
-rw-r--r--arch/s390/mm/init.c6
-rw-r--r--arch/s390/mm/vmem.c14
-rw-r--r--include/asm-s390/compat.h28
-rw-r--r--include/asm-s390/lowcore.h6
-rw-r--r--include/asm-s390/mmu_context.h50
-rw-r--r--include/asm-s390/pgalloc.h85
-rw-r--r--include/asm-s390/pgtable.h146
-rw-r--r--include/asm-s390/processor.h6
-rw-r--r--include/asm-s390/ptrace.h11
-rw-r--r--include/asm-s390/setup.h12
-rw-r--r--include/asm-s390/smp.h2
-rw-r--r--include/asm-s390/system.h4
-rw-r--r--include/asm-s390/tlbflush.h9
-rw-r--r--include/asm-s390/uaccess.h2
28 files changed, 913 insertions, 115 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 12272361c018..5c7e981c115b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -134,6 +134,31 @@ config AUDIT_ARCH
bool
default y
+config S390_SWITCH_AMODE
+ bool "Switch kernel/user addressing modes"
+ help
+ This option allows to switch the addressing modes of kernel and user
+ space. The kernel parameter switch_amode=on will enable this feature,
+ default is disabled. Enabling this (via kernel parameter) on machines
+ earlier than IBM System z9-109 EC/BC will reduce system performance.
+
+ Note that this option will also be selected by selecting the execute
+ protection option below. Enabling the execute protection via the
+ noexec kernel parameter will also switch the addressing modes,
+ independent of the switch_amode kernel parameter.
+
+
+config S390_EXEC_PROTECT
+ bool "Data execute protection"
+ select S390_SWITCH_AMODE
+ help
+ This option allows to enable a buffer overflow protection for user
+ space programs and it also selects the addressing mode option above.
+ The kernel parameter noexec=on will enable this feature and also
+ switch the addressing modes, default is disabled. Enabling this (via
+ kernel parameter) on machines earlier than IBM System z9-109 EC/BC
+ will reduce system performance.
+
comment "Code generation options"
choice
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 46bb38515b0d..12eb97f9c1d1 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -108,6 +108,8 @@ CONFIG_DEFAULT_MIGRATION_COST=1000000
CONFIG_COMPAT=y
CONFIG_SYSVIPC_COMPAT=y
CONFIG_AUDIT_ARCH=y
+CONFIG_S390_SWITCH_AMODE=y
+CONFIG_S390_EXEC_PROTECT=y
#
# Code generation options
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index cf84d697daed..666bb6daa148 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -69,6 +69,12 @@
#include "compat_linux.h"
+long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
+ PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
+long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME |
+ PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
+ PSW32_MASK_PSTATE);
/* For this source file, we want overflow handling. */
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 1a18e29668ef..e89f8c0c42a0 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -115,37 +115,6 @@ typedef struct
__u32 addr;
} _psw_t32 __attribute__ ((aligned(8)));
-#define PSW32_MASK_PER 0x40000000UL
-#define PSW32_MASK_DAT 0x04000000UL
-#define PSW32_MASK_IO 0x02000000UL
-#define PSW32_MASK_EXT 0x01000000UL
-#define PSW32_MASK_KEY 0x00F00000UL
-#define PSW32_MASK_MCHECK 0x00040000UL
-#define PSW32_MASK_WAIT 0x00020000UL
-#define PSW32_MASK_PSTATE 0x00010000UL
-#define PSW32_MASK_ASC 0x0000C000UL
-#define PSW32_MASK_CC 0x00003000UL
-#define PSW32_MASK_PM 0x00000f00UL
-
-#define PSW32_ADDR_AMODE31 0x80000000UL
-#define PSW32_ADDR_INSN 0x7FFFFFFFUL
-
-#define PSW32_BASE_BITS 0x00080000UL
-
-#define PSW32_ASC_PRIMARY 0x00000000UL
-#define PSW32_ASC_ACCREG 0x00004000UL
-#define PSW32_ASC_SECONDARY 0x00008000UL
-#define PSW32_ASC_HOME 0x0000C000UL
-
-#define PSW32_USER_BITS (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | \
- PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | \
- PSW32_MASK_PSTATE)
-
-#define PSW32_MASK_MERGE(CURRENT,NEW) \
- (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \
- ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM)))
-
-
typedef struct
{
_psw_t32 psw;
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 8d17b2ab6f21..887a9881d0d0 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -298,7 +298,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
_s390_regs_common32 regs32;
int err, i;
- regs32.psw.mask = PSW32_MASK_MERGE(PSW32_USER_BITS,
+ regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits,
(__u32)(regs->psw.mask >> 32));
regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr;
for (i = 0; i < NUM_GPRS; i++)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 9e9972e8a52b..2c91226e1d40 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1016,12 +1016,12 @@ void s390_reset_system(void)
__ctl_clear_bit(0,28);
/* Set new machine check handler */
- S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK;
+ S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
S390_lowcore.mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) &reset_mcck_handler;
/* Set new program check handler */
- S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
S390_lowcore.program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) &reset_pgm_handler;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6603fbb41d07..5acfac654f9d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -144,7 +144,7 @@ static void default_idle(void)
trace_hardirqs_on();
/* Wait for external, I/O or machine check interrupt. */
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
PSW_MASK_IO | PSW_MASK_EXT);
}
@@ -190,7 +190,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
struct pt_regs regs;
memset(&regs, 0, sizeof(regs));
- regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
+ regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
regs.gprs[9] = (unsigned long) fn;
regs.gprs[10] = (unsigned long) arg;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 29fde70090fe..2a8f0872ea8b 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -230,9 +230,9 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
*/
if (addr == (addr_t) &dummy->regs.psw.mask &&
#ifdef CONFIG_COMPAT
- data != PSW_MASK_MERGE(PSW_USER32_BITS, data) &&
+ data != PSW_MASK_MERGE(psw_user32_bits, data) &&
#endif
- data != PSW_MASK_MERGE(PSW_USER_BITS, data))
+ data != PSW_MASK_MERGE(psw_user_bits, data))
/* Invalid psw mask. */
return -EINVAL;
#ifndef CONFIG_64BIT
@@ -393,7 +393,7 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
if (addr == (addr_t) &dummy32->regs.psw.mask) {
/* Fake a 31 bit psw mask. */
tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32);
- tmp = PSW32_MASK_MERGE(PSW32_USER_BITS, tmp);
+ tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp);
} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
/* Fake a 31 bit psw address. */
tmp = (__u32) task_pt_regs(child)->psw.addr |
@@ -468,11 +468,11 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
*/
if (addr == (addr_t) &dummy32->regs.psw.mask) {
/* Build a 64 bit psw mask from 31 bit mask. */
- if (tmp != PSW32_MASK_MERGE(PSW32_USER_BITS, tmp))
+ if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp))
/* Invalid psw mask. */
return -EINVAL;
task_pt_regs(child)->psw.mask =
- PSW_MASK_MERGE(PSW_USER32_BITS, (__u64) tmp << 32);
+ PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32);
} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
/* Build a 64 bit psw address from 31 bit address. */
task_pt_regs(child)->psw.addr =
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 25bf7277d311..b1b9a931237d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,13 @@
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
+#include <asm/compat.h>
+
+long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
+ PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
+long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
+ PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
/*
* User copy operations.
@@ -383,6 +390,84 @@ static int __init early_parse_ipldelay(char *p)
}
early_param("ipldelay", early_parse_ipldelay);
+#ifdef CONFIG_S390_SWITCH_AMODE
+unsigned int switch_amode = 0;
+EXPORT_SYMBOL_GPL(switch_amode);
+
+static inline void set_amode_and_uaccess(unsigned long user_amode,
+ unsigned long user32_amode)
+{
+ psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
+ PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
+#ifdef CONFIG_COMPAT
+ psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
+ PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
+ psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode |
+ PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
+ PSW32_MASK_PSTATE;
+#endif
+ psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
+ PSW_MASK_MCHECK | PSW_DEFAULT_KEY;
+
+ if (MACHINE_HAS_MVCOS) {
+ printk("mvcos available.\n");
+ memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess));
+ } else {
+ printk("mvcos not available.\n");
+ memcpy(&uaccess, &uaccess_pt, sizeof(uaccess));
+ }
+}
+
+/*
+ * Switch kernel/user addressing modes?
+ */
+static int __init early_parse_switch_amode(char *p)
+{
+ switch_amode = 1;
+ return 0;
+}
+early_param("switch_amode", early_parse_switch_amode);
+
+#else /* CONFIG_S390_SWITCH_AMODE */
+static inline void set_amode_and_uaccess(unsigned long user_amode,
+ unsigned long user32_amode)
+{
+}
+#endif /* CONFIG_S390_SWITCH_AMODE */
+
+#ifdef CONFIG_S390_EXEC_PROTECT
+unsigned int s390_noexec = 0;
+EXPORT_SYMBOL_GPL(s390_noexec);
+
+/*
+ * Enable execute protection?
+ */
+static int __init early_parse_noexec(char *p)
+{
+ if (!strncmp(p, "off", 3))
+ return 0;
+ switch_amode = 1;
+ s390_noexec = 1;
+ return 0;
+}
+early_param("noexec", early_parse_noexec);
+#endif /* CONFIG_S390_EXEC_PROTECT */
+
+static void setup_addressing_mode(void)
+{
+ if (s390_noexec) {
+ printk("S390 execute protection active, ");
+ set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY);
+ return;
+ }
+ if (switch_amode) {
+ printk("S390 address spaces switched, ");
+ set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY);
+ }
+}
+
static void __init
setup_lowcore(void)
{
@@ -399,19 +484,21 @@ setup_lowcore(void)
lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
lc->restart_psw.addr =
PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = PSW_KERNEL_BITS;
+ if (switch_amode)
+ lc->restart_psw.mask |= PSW_ASC_HOME;
+ lc->external_new_psw.mask = psw_kernel_bits;
lc->external_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
+ lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
- lc->program_new_psw.mask = PSW_KERNEL_BITS;
+ lc->program_new_psw.mask = psw_kernel_bits;
lc->program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long)pgm_check_handler;
lc->mcck_new_psw.mask =
- PSW_KERNEL_BITS & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT;
+ psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT;
lc->mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = PSW_KERNEL_BITS;
+ lc->io_new_psw.mask = psw_kernel_bits;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
lc->ipl_device = S390_lowcore.ipl_device;
lc->jiffy_timer = -1LL;
@@ -645,6 +732,7 @@ setup_arch(char **cmdline_p)
parse_early_param();
setup_memory_end();
+ setup_addressing_mode();
setup_memory();
setup_resources();
setup_lowcore();
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 4c8a7954ef48..554f9cf7499c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -119,7 +119,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
/* Copy a 'clean' PSW mask to the user to avoid leaking
information about whether PER is currently on. */
- user_sregs.regs.psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask);
+ user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask);
user_sregs.regs.psw.addr = regs->psw.addr;
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3cb7e1032072..cb155d9fd749 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -244,7 +244,7 @@ static inline void do_wait_for_stop(void)
void smp_send_stop(void)
{
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK);
+ __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
/* write magic number to zero page (absolute 0) */
lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC;
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
index 78c48f88f5f7..6d8772339d76 100644
--- a/arch/s390/lib/uaccess_mvcos.c
+++ b/arch/s390/lib/uaccess_mvcos.c
@@ -162,6 +162,44 @@ static size_t clear_user_mvcos(size_t size, void __user *to)
return size;
}
+static size_t strnlen_user_mvcos(size_t count, const char __user *src)
+{
+ char buf[256];
+ int rc;
+ size_t done, len, len_str;
+
+ done = 0;
+ do {
+ len = min(count - done, (size_t) 256);
+ rc = uaccess.copy_from_user(len, src + done, buf);
+ if (unlikely(rc == len))
+ return 0;
+ len -= rc;
+ len_str = strnlen(buf, len);
+ done += len_str;
+ } while ((len_str == len) && (done < count));
+ return done + 1;
+}
+
+static size_t strncpy_from_user_mvcos(size_t count, const char __user *src,
+ char *dst)
+{
+ int rc;
+ size_t done, len, len_str;
+
+ done = 0;
+ do {
+ len = min(count - done, (size_t) 4096);
+ rc = uaccess.copy_from_user(len, src + done, dst);
+ if (unlikely(rc == len))
+ return -EFAULT;
+ len -= rc;
+ len_str = strnlen(dst, len);
+ done += len_str;
+ } while ((len_str == len) && (done < count));
+ return done;
+}
+
struct uaccess_ops uaccess_mvcos = {
.copy_from_user = copy_from_user_mvcos_check,
.copy_from_user_small = copy_from_user_std,
@@ -174,3 +212,18 @@ struct uaccess_ops uaccess_mvcos = {
.futex_atomic_op = futex_atomic_op_std,
.futex_atomic_cmpxchg = futex_atomic_cmpxchg_std,
};
+
+#ifdef CONFIG_S390_SWITCH_AMODE
+struct uaccess_ops uaccess_mvcos_switch = {
+ .copy_from_user = copy_from_user_mvcos,
+ .copy_from_user_small = copy_from_user_mvcos,
+ .copy_to_user = copy_to_user_mvcos,
+ .copy_to_user_small = copy_to_user_mvcos,
+ .copy_in_user = copy_in_user_mvcos,
+ .clear_user = clear_user_mvcos,
+ .strnlen_user = strnlen_user_mvcos,
+ .strncpy_from_user = strncpy_from_user_mvcos,
+ .futex_atomic_op = futex_atomic_op_pt,
+ .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
+};
+#endif
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 24ead559c7bb..637192fa7c9a 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -1,7 +1,8 @@
/*
* arch/s390/lib/uaccess_pt.c
*
- * User access functions based on page table walks.
+ * User access functions based on page table walks for enhanced
+ * system layout without hardware support.
*
* Copyright IBM Corp. 2006
* Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
@@ -134,6 +135,49 @@ fault:
goto retry;
}
+/*
+ * Do DAT for user address by page table walk, return kernel address.
+ * This function needs to be called with current->mm->page_table_lock held.
+ */
+static inline unsigned long __dat_user_addr(unsigned long uaddr)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long pfn, ret;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ int rc;
+
+ ret = 0;
+retry:
+ pgd = pgd_offset(mm, uaddr);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto fault;
+
+ pmd = pmd_offset(pgd, uaddr);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto fault;
+
+ pte = pte_offset_map(pmd, uaddr);
+ if (!pte || !pte_present(*pte))
+ goto fault;
+
+ pfn = pte_pfn(*pte);
+ if (!pfn_valid(pfn))
+ goto out;
+
+ ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
+out:
+ return ret;
+fault:
+ spin_unlock(&mm->page_table_lock);
+ rc = __handle_fault(mm, uaddr, 0);
+ spin_lock(&mm->page_table_lock);
+ if (rc)
+ goto out;
+ goto retry;
+}
+
size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
{
size_t rc;
@@ -156,3 +200,277 @@ size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
}
return __user_copy_pt((unsigned long) to, (void *) from, n, 1);
}
+
+static size_t clear_user_pt(size_t n, void __user *to)
+{
+ long done, size, ret;
+
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ memset((void __kernel __force *) to, 0, n);
+ return 0;
+ }
+ done = 0;
+ do {
+ if (n - done > PAGE_SIZE)
+ size = PAGE_SIZE;
+ else
+ size = n - done;
+ ret = __user_copy_pt((unsigned long) to + done,
+ &empty_zero_page, size, 1);
+ done += size;
+ if (ret)
+ return ret + n - done;
+ } while (done < n);
+ return 0;
+}
+
+static size_t strnlen_user_pt(size_t count, const char __user *src)
+{
+ char *addr;
+ unsigned long uaddr = (unsigned long) src;
+ struct mm_struct *mm = current->mm;
+ unsigned long offset, pfn, done, len;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ size_t len_str;
+
+ if (segment_eq(get_fs(), KERNEL_DS))
+ return strnlen((const char __kernel __force *) src, count) + 1;
+ done = 0;
+retry:
+ spin_lock(&mm->page_table_lock);
+ do {
+ pgd = pgd_offset(mm, uaddr);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto fault;
+
+ pmd = pmd_offset(pgd, uaddr);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto fault;
+
+ pte = pte_offset_map(pmd, uaddr);
+ if (!pte || !pte_present(*pte))
+ goto fault;
+
+ pfn = pte_pfn(*pte);
+ if (!pfn_valid(pfn)) {
+ done = -1;
+ goto out;
+ }
+
+ offset = uaddr & (PAGE_SIZE-1);
+ addr = (char *)(pfn << PAGE_SHIFT) + offset;
+ len = min(count - done, PAGE_SIZE - offset);
+ len_str = strnlen(addr, len);
+ done += len_str;
+ uaddr += len_str;
+ } while ((len_str == len) && (done < count));
+out:
+ spin_unlock(&mm->page_table_lock);
+ return done + 1;
+fault:
+ spin_unlock(&mm->page_table_lock);
+ if (__handle_fault(mm, uaddr, 0)) {
+ return 0;
+ }
+ goto retry;
+}
+
+static size_t strncpy_from_user_pt(size_t count, const char __user *src,
+ char *dst)
+{
+ size_t n = strnlen_user_pt(count, src);
+
+ if (!n)
+ return -EFAULT;
+ if (n > count)
+ n = count;
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ memcpy(dst, (const char __kernel __force *) src, n);
+ if (dst[n-1] == '\0')
+ return n-1;
+ else
+ return n;
+ }
+ if (__user_copy_pt((unsigned long) src, dst, n, 0))
+ return -EFAULT;
+ if (dst[n-1] == '\0')
+ return n-1;
+ else
+ return n;
+}
+
+static size_t copy_in_user_pt(size_t n, void __user *to,
+ const void __user *from)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
+ uaddr, done, size;
+ unsigned long uaddr_from = (unsigned long) from;
+ unsigned long uaddr_to = (unsigned long) to;
+ pgd_t *pgd_from, *pgd_to;
+ pmd_t *pmd_from, *pmd_to;
+ pte_t *pte_from, *pte_to;
+ int write_user;
+
+ done = 0;
+retry:
+ spin_lock(&mm->page_table_lock);
+ do {
+ pgd_from = pgd_offset(mm, uaddr_from);
+ if (pgd_none(*pgd_from) || unlikely(pgd_bad(*pgd_from))) {
+ uaddr = uaddr_from;
+ write_user = 0;
+ goto fault;
+ }
+ pgd_to = pgd_offset(mm, uaddr_to);
+ if (pgd_none(*pgd_to) || unlikely(pgd_bad(*pgd_to))) {
+ uaddr = uaddr_to;
+ write_user = 1;
+ goto fault;
+ }
+
+ pmd_from = pmd_offset(pgd_from, uaddr_from);
+ if (pmd_none(*pmd_from) || unlikely(pmd_bad(*pmd_from))) {
+ uaddr = uaddr_from;
+ write_user = 0;
+ goto fault;
+ }
+ pmd_to = pmd_offset(pgd_to, uaddr_to);
+ if (pmd_none(*pmd_to) || unlikely(pmd_bad(*pmd_to))) {
+ uaddr = uaddr_to;
+ write_user = 1;
+ goto fault;
+ }
+
+ pte_from = pte_offset_map(pmd_from, uaddr_from);
+ if (!pte_from || !pte_present(*pte_from)) {
+ uaddr = uaddr_from;
+ write_user = 0;
+ goto fault;
+ }
+ pte_to = pte_offset_map(pmd_to, uaddr_to);
+ if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) {
+ uaddr = uaddr_to;
+ write_user = 1;
+ goto fault;
+ }
+
+ pfn_from = pte_pfn(*pte_from);
+ if (!pfn_valid(pfn_from))
+ goto out;
+ pfn_to = pte_pfn(*pte_to);
+ if (!pfn_valid(pfn_to))
+ goto out;
+
+ offset_from = uaddr_from & (PAGE_SIZE-1);
+ offset_to = uaddr_from & (PAGE_SIZE-1);
+ offset_max = max(offset_from, offset_to);
+ size = min(n - done, PAGE_SIZE - offset_max);
+
+ memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
+ (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
+ done += size;
+ uaddr_from += size;
+ uaddr_to += size;
+ } while (done < n);
+out:
+ spin_unlock(&mm->page_table_lock);
+ return n - done;
+fault:
+ spin_unlock(&mm->page_table_lock);
+ if (__handle_fault(mm, uaddr, write_user))
+ return n - done;
+ goto retry;
+}
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
+ asm volatile("0: l %1,0(%6)\n" \
+ "1: " insn \
+ "2: cs %1,%2,0(%6)\n" \
+ "3: jl 1b\n" \
+ " lhi %0,0\n" \
+ "4:\n" \
+ EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
+ "=m" (*uaddr) \
+ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
+ "m" (*uaddr) : "cc" );
+
+int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+{
+ int oldval = 0, newval, ret;
+
+ spin_lock(&current->mm->page_table_lock);
+ uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+ if (!uaddr) {
+ spin_unlock(&current->mm->page_table_lock);
+ return -EFAULT;
+ }
+ get_page(virt_to_page(uaddr));
+ spin_unlock(&current->mm->page_table_lock);
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("lr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("lr %2,%1\nar %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("lr %2,%1\nor %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+ put_page(virt_to_page(uaddr));
+ *old = oldval;
+ return ret;
+}
+
+int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+ int ret;
+
+ spin_lock(&current->mm->page_table_lock);
+ uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+ if (!uaddr) {
+ spin_unlock(&current->mm->page_table_lock);
+ return -EFAULT;
+ }
+ get_page(virt_to_page(uaddr));
+ spin_unlock(&current->mm->page_table_lock);
+ asm volatile(" cs %1,%4,0(%5)\n"
+ "0: lr %0,%1\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+ : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ : "cc", "memory" );
+ put_page(virt_to_page(uaddr));
+ return ret;
+}
+
+struct uaccess_ops uaccess_pt = {
+ .copy_from_user = copy_from_user_pt,
+ .copy_from_user_small = copy_from_user_pt,
+ .copy_to_user = copy_to_user_pt,
+ .copy_to_user_small = copy_to_user_pt,
+ .copy_in_user = copy_in_user_pt,
+ .clear_user = clear_user_pt,
+ .strnlen_user = strnlen_user_pt,
+ .strncpy_from_user = strncpy_from_user_pt,
+ .futex_atomic_op = futex_atomic_op_pt,
+ .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
+};
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3382e29f34a4..9ff143e87746 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -137,7 +137,9 @@ static int __check_access_register(struct pt_regs *regs, int error_code)
/*
* Check which address space the address belongs to.
- * Returns 1 for user space and 0 for kernel space.
+ * May return 1 or 2 for user space and 0 for kernel space.
+ * Returns 2 for user space in primary addressing mode with
+ * CONFIG_S390_EXEC_PROTECT on and kernel parameter noexec=on.
*/
static inline int check_user_space(struct pt_regs *regs, int error_code)
{
@@ -154,7 +156,7 @@ static inline int check_user_space(struct pt_regs *regs, int error_code)
return __check_access_register(regs, error_code);
if (descriptor == 2)
return current->thread.mm_segment.ar4;
- return descriptor != 0;
+ return ((descriptor != 0) ^ (switch_amode)) << s390_noexec;
}
/*
@@ -183,6 +185,77 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
force_sig_info(SIGSEGV, &si, current);
}
+#ifdef CONFIG_S390_EXEC_PROTECT
+extern long sys_sigreturn(struct pt_regs *regs);
+extern long sys_rt_sigreturn(struct pt_regs *regs);
+extern long sys32_sigreturn(struct pt_regs *regs);
+extern long sys32_rt_sigreturn(struct pt_regs *regs);
+
+static inline void do_sigreturn(struct mm_struct *mm, struct pt_regs *regs,
+ int rt)
+{
+ up_read(&mm->mmap_sem);
+ clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(current, TIF_31BIT)) {
+ if (rt)
+ sys32_rt_sigreturn(regs);
+ else
+ sys32_sigreturn(regs);
+ return;
+ }
+#endif /* CONFIG_COMPAT */
+ if (rt)
+ sys_rt_sigreturn(regs);
+ else
+ sys_sigreturn(regs);
+ return;
+}
+
+static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
+ unsigned long address, unsigned long error_code)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ u16 *instruction;
+ unsigned long pfn, uaddr = regs->psw.addr;
+
+ spin_lock(&mm->page_table_lock);
+ pgd = pgd_offset(mm, uaddr);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto out_fault;
+ pmd = pmd_offset(pgd, uaddr);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto out_fault;
+ pte = pte_offset_map(pmd_offset(pgd_offset(mm, uaddr), uaddr), uaddr);
+ if (!pte || !pte_present(*pte))
+ goto out_fault;
+ pfn = pte_pfn(*pte);
+ if (!pfn_valid(pfn))
+ goto out_fault;
+ spin_unlock(&mm->page_table_lock);
+
+ instruction = (u16 *) ((pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE-1)));
+ if (*instruction == 0x0a77)
+ do_sigreturn(mm, regs, 0);
+ else if (*instruction == 0x0aad)
+ do_sigreturn(mm, regs, 1);
+ else {
+ printk("- XXX - do_exception: task = %s, primary, NO EXEC "
+ "-> SIGSEGV\n", current->comm);
+ up_read(&mm->mmap_sem);
+ current->thread.prot_addr = address;
+ current->thread.trap_no = error_code;
+ do_sigsegv(regs, error_code, SEGV_MAPERR, address);
+ }
+ return 0;
+out_fault:
+ spin_unlock(&mm->page_table_lock);
+ return -EFAULT;
+}
+#endif /* CONFIG_S390_EXEC_PROTECT */
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -260,6 +333,17 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
+
+#ifdef CONFIG_S390_EXEC_PROTECT
+ if (unlikely((user_address == 2) && !(vma->vm_flags & VM_EXEC)))
+ if (!signal_return(mm, regs, address, error_code))
+ /*
+ * signal_return() has done an up_read(&mm->mmap_sem)
+ * if it returns 0.
+ */
+ return;
+#endif
+
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 0e7e9acab9e1..162a338a5575 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -104,7 +104,7 @@ static void __init setup_ro_region(void)
pmd = pmd_offset(pgd, address);
pte = pte_offset_kernel(pmd, address);
new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
- set_pte(pte, new_pte);
+ *pte = new_pte;
}
}
@@ -124,11 +124,11 @@ void __init paging_init(void)
#ifdef CONFIG_64BIT
pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE;
for (i = 0; i < PTRS_PER_PGD; i++)
- pgd_clear(pg_dir + i);
+ pgd_clear_kernel(pg_dir + i);
#else
pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
for (i = 0; i < PTRS_PER_PGD; i++)
- pmd_clear((pmd_t *)(pg_dir + i));
+ pmd_clear_kernel((pmd_t *)(pg_dir + i));
#endif
vmem_map_init();
setup_ro_region();
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index cd3d93e8c211..92a565190028 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -82,7 +82,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
if (!pmd)
return NULL;
for (i = 0; i < PTRS_PER_PMD; i++)
- pmd_clear(pmd + i);
+ pmd_clear_kernel(pmd + i);
return pmd;
}
@@ -97,7 +97,7 @@ static inline pte_t *vmem_pte_alloc(void)
return NULL;
pte_val(empty_pte) = _PAGE_TYPE_EMPTY;
for (i = 0; i < PTRS_PER_PTE; i++)
- set_pte(pte + i, empty_pte);
+ pte[i] = empty_pte;
return pte;
}
@@ -119,7 +119,7 @@ static int vmem_add_range(unsigned long start, unsigned long size)
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
goto out;
- pgd_populate(&init_mm, pg_dir, pm_dir);
+ pgd_populate_kernel(&init_mm, pg_dir, pm_dir);
}
pm_dir = pmd_offset(pg_dir, address);
@@ -132,7 +132,7 @@ static int vmem_add_range(unsigned long start, unsigned long size)
pt_dir = pte_offset_kernel(pm_dir, address);
pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
- set_pte(pt_dir, pte);
+ *pt_dir = pte;
}
ret = 0;
out:
@@ -161,7 +161,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
if (pmd_none(*pm_dir))
continue;
pt_dir = pte_offset_kernel(pm_dir, address);
- set_pte(pt_dir, pte);
+ *pt_dir = pte;
}
flush_tlb_kernel_range(start, start + size);
}
@@ -191,7 +191,7 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size)
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
goto out;
- pgd_populate(&init_mm, pg_dir, pm_dir);
+ pgd_populate_kernel(&init_mm, pg_dir, pm_dir);
}
pm_dir = pmd_offset(pg_dir, address);
@@ -210,7 +210,7 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size)
if (!new_page)
goto out;
pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
- set_pte(pt_dir, pte);
+ *pt_dir = pte;
}
}
ret = 0;
diff --git a/include/asm-s390/compat.h b/include/asm-s390/compat.h
index 356a0b183539..296f4f1a20e1 100644
--- a/include/asm-s390/compat.h
+++ b/include/asm-s390/compat.h
@@ -6,6 +6,34 @@
#include <linux/types.h>
#include <linux/sched.h>
+#define PSW32_MASK_PER 0x40000000UL
+#define PSW32_MASK_DAT 0x04000000UL
+#define PSW32_MASK_IO 0x02000000UL
+#define PSW32_MASK_EXT 0x01000000UL
+#define PSW32_MASK_KEY 0x00F00000UL
+#define PSW32_MASK_MCHECK 0x00040000UL
+#define PSW32_MASK_WAIT 0x00020000UL
+#define PSW32_MASK_PSTATE 0x00010000UL
+#define PSW32_MASK_ASC 0x0000C000UL
+#define PSW32_MASK_CC 0x00003000UL
+#define PSW32_MASK_PM 0x00000f00UL
+
+#define PSW32_ADDR_AMODE31 0x80000000UL
+#define PSW32_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW32_BASE_BITS 0x00080000UL
+
+#define PSW32_ASC_PRIMARY 0x00000000UL
+#define PSW32_ASC_ACCREG 0x00004000UL
+#define PSW32_ASC_SECONDARY 0x00008000UL
+#define PSW32_ASC_HOME 0x0000C000UL
+
+#define PSW32_MASK_MERGE(CURRENT,NEW) \
+ (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \
+ ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM)))
+
+extern long psw32_user_bits;
+
#define COMPAT_USER_HZ 100
typedef u32 compat_size_t;
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 74f7389bd3ee..4a31d0a7ee83 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -220,7 +220,8 @@ struct _lowcore
__u32 kernel_asce; /* 0xc4c */
__u32 user_asce; /* 0xc50 */
__u32 panic_stack; /* 0xc54 */
- __u8 pad10[0xc60-0xc58]; /* 0xc58 */
+ __u32 user_exec_asce; /* 0xc58 */
+ __u8 pad10[0xc60-0xc5c]; /* 0xc5c */
/* entry.S sensitive area start */
struct cpuinfo_S390 cpu_data; /* 0xc60 */
__u32 ipl_device; /* 0xc7c */
@@ -310,7 +311,8 @@ struct _lowcore
__u64 kernel_asce; /* 0xd58 */
__u64 user_asce; /* 0xd60 */
__u64 panic_stack; /* 0xd68 */
- __u8 pad10[0xd80-0xd70]; /* 0xd70 */
+ __u64 user_exec_asce; /* 0xd70 */
+ __u8 pad10[0xd80-0xd78]; /* 0xd78 */
/* entry.S sensitive area start */
struct cpuinfo_S390 cpu_data; /* 0xd80 */
__u32 ipl_device; /* 0xdb8 */
diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h
index bcf24a873874..1d21da220d49 100644
--- a/include/asm-s390/mmu_context.h
+++ b/include/asm-s390/mmu_context.h
@@ -9,6 +9,7 @@
#ifndef __S390_MMU_CONTEXT_H
#define __S390_MMU_CONTEXT_H
+#include <asm/pgalloc.h>
/*
* get a new mmu context.. S390 don't know about contexts.
*/
@@ -16,29 +17,44 @@
#define destroy_context(mm) do { } while (0)
+#ifndef __s390x__
+#define LCTL_OPCODE "lctl"
+#define PGTABLE_BITS (_SEGMENT_TABLE|USER_STD_MASK)
+#else
+#define LCTL_OPCODE "lctlg"
+#define PGTABLE_BITS (_REGION_TABLE|USER_STD_MASK)
+#endif
+
static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *tsk)
{
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
+ struct task_struct *tsk)
{
- if (prev != next) {
-#ifndef __s390x__
- S390_lowcore.user_asce = (__pa(next->pgd)&PAGE_MASK) |
- (_SEGMENT_TABLE|USER_STD_MASK);
- /* Load home space page table origin. */
- asm volatile("lctl 13,13,%0"
- : : "m" (S390_lowcore.user_asce) );
-#else /* __s390x__ */
- S390_lowcore.user_asce = (__pa(next->pgd) & PAGE_MASK) |
- (_REGION_TABLE|USER_STD_MASK);
- /* Load home space page table origin. */
- asm volatile("lctlg 13,13,%0"
- : : "m" (S390_lowcore.user_asce) );
-#endif /* __s390x__ */
- }
+ pgd_t *shadow_pgd = get_shadow_pgd(next->pgd);
+
+ if (prev != next) {
+ S390_lowcore.user_asce = (__pa(next->pgd) & PAGE_MASK) |
+ PGTABLE_BITS;
+ if (shadow_pgd) {
+ /* Load primary/secondary space page table origin. */
+ S390_lowcore.user_exec_asce =
+ (__pa(shadow_pgd) & PAGE_MASK) | PGTABLE_BITS;
+ asm volatile(LCTL_OPCODE" 1,1,%0\n"
+ LCTL_OPCODE" 7,7,%1"
+ : : "m" (S390_lowcore.user_exec_asce),
+ "m" (S390_lowcore.user_asce) );
+ } else if (switch_amode) {
+ /* Load primary space page table origin. */
+ asm volatile(LCTL_OPCODE" 1,1,%0"
+ : : "m" (S390_lowcore.user_asce) );
+ } else
+ /* Load home space page table origin. */
+ asm volatile(LCTL_OPCODE" 13,13,%0"
+ : : "m" (S390_lowcore.user_asce) );
+ }
cpu_set(smp_processor_id(), next->cpu_vm_mask);
}
@@ -51,4 +67,4 @@ static inline void activate_mm(struct mm_struct *prev,
set_fs(current->thread.mm_segment);
}
-#endif
+#endif /* __S390_MMU_CONTEXT_H */
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 0707a7e2fc16..56c8a6c80e2e 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -47,6 +47,17 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (!pgd)
return NULL;
+ if (s390_noexec) {
+ pgd_t *shadow_pgd = (pgd_t *)
+ __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER);
+ struct page *page = virt_to_page(pgd);
+
+ if (!shadow_pgd) {
+ free_pages((unsigned long) pgd, PGD_ALLOC_ORDER);
+ return NULL;
+ }
+ page->lru.next = (void *) shadow_pgd;
+ }
for (i = 0; i < PTRS_PER_PGD; i++)
#ifndef __s390x__
pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
@@ -58,6 +69,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
static inline void pgd_free(pgd_t *pgd)
{
+ pgd_t *shadow_pgd = get_shadow_pgd(pgd);
+
+ if (shadow_pgd)
+ free_pages((unsigned long) shadow_pgd, PGD_ALLOC_ORDER);
free_pages((unsigned long) pgd, PGD_ALLOC_ORDER);
}
@@ -71,6 +86,7 @@ static inline void pgd_free(pgd_t *pgd)
#define pmd_free(x) do { } while (0)
#define __pmd_free_tlb(tlb,x) do { } while (0)
#define pgd_populate(mm, pmd, pte) BUG()
+#define pgd_populate_kernel(mm, pmd, pte) BUG()
#else /* __s390x__ */
static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
{
@@ -79,6 +95,17 @@ static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
if (!pmd)
return NULL;
+ if (s390_noexec) {
+ pmd_t *shadow_pmd = (pmd_t *)
+ __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER);
+ struct page *page = virt_to_page(pmd);
+
+ if (!shadow_pmd) {
+ free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
+ return NULL;
+ }
+ page->lru.next = (void *) shadow_pmd;
+ }
for (i=0; i < PTRS_PER_PMD; i++)
pmd_clear(pmd + i);
return pmd;
@@ -86,6 +113,10 @@ static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
static inline void pmd_free (pmd_t *pmd)
{
+ pmd_t *shadow_pmd = get_shadow_pmd(pmd);
+
+ if (shadow_pmd)
+ free_pages((unsigned long) shadow_pmd, PMD_ALLOC_ORDER);
free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
}
@@ -95,11 +126,22 @@ static inline void pmd_free (pmd_t *pmd)
pmd_free(pmd); \
} while (0)
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+static inline void
+pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
{
pgd_val(*pgd) = _PGD_ENTRY | __pa(pmd);
}
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+{
+ pgd_t *shadow_pgd = get_shadow_pgd(pgd);
+ pmd_t *shadow_pmd = get_shadow_pmd(pmd);
+
+ if (shadow_pgd && shadow_pmd)
+ pgd_populate_kernel(mm, shadow_pgd, shadow_pmd);
+ pgd_populate_kernel(mm, pgd, pmd);
+}
+
#endif /* __s390x__ */
static inline void
@@ -119,7 +161,13 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page)
{
- pmd_populate_kernel(mm, pmd, (pte_t *)page_to_phys(page));
+ pte_t *pte = (pte_t *)page_to_phys(page);
+ pmd_t *shadow_pmd = get_shadow_pmd(pmd);
+ pte_t *shadow_pte = get_shadow_pte(pte);
+
+ pmd_populate_kernel(mm, pmd, pte);
+ if (shadow_pmd && shadow_pte)
+ pmd_populate_kernel(mm, shadow_pmd, shadow_pte);
}
/*
@@ -133,6 +181,17 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
if (!pte)
return NULL;
+ if (s390_noexec) {
+ pte_t *shadow_pte = (pte_t *)
+ __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ struct page *page = virt_to_page(pte);
+
+ if (!shadow_pte) {
+ free_page((unsigned long) pte);
+ return NULL;
+ }
+ page->lru.next = (void *) shadow_pte;
+ }
for (i=0; i < PTRS_PER_PTE; i++) {
pte_clear(mm, vmaddr, pte + i);
vmaddr += PAGE_SIZE;
@@ -151,14 +210,30 @@ pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
static inline void pte_free_kernel(pte_t *pte)
{
- free_page((unsigned long) pte);
+ pte_t *shadow_pte = get_shadow_pte(pte);
+
+ if (shadow_pte)
+ free_page((unsigned long) shadow_pte);
+ free_page((unsigned long) pte);
}
static inline void pte_free(struct page *pte)
{
- __free_page(pte);
+ struct page *shadow_page = get_shadow_page(pte);
+
+ if (shadow_page)
+ __free_page(shadow_page);
+ __free_page(pte);
}
-#define __pte_free_tlb(tlb,pte) tlb_remove_page(tlb,pte)
+#define __pte_free_tlb(tlb, pte) \
+({ \
+ struct mmu_gather *__tlb = (tlb); \
+ struct page *__pte = (pte); \
+ struct page *shadow_page = get_shadow_page(__pte); \
+ if (shadow_page) \
+ tlb_remove_page(__tlb, shadow_page); \
+ tlb_remove_page(__tlb, __pte); \
+})
#endif /* _S390_PGALLOC_H */
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 304ee7736413..13c16546eff5 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -224,6 +224,8 @@ extern unsigned long vmalloc_end;
#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
#define _PAGE_TYPE_RO 0x200
#define _PAGE_TYPE_RW 0x000
+#define _PAGE_TYPE_EX_RO 0x202
+#define _PAGE_TYPE_EX_RW 0x002
/*
* PTE type bits are rather complicated. handle_pte_fault uses pte_present,
@@ -244,11 +246,13 @@ extern unsigned long vmalloc_end;
* _PAGE_TYPE_FILE 11?1 -> 11?1
* _PAGE_TYPE_RO 0100 -> 1100
* _PAGE_TYPE_RW 0000 -> 1000
+ * _PAGE_TYPE_EX_RO 0110 -> 1110
+ * _PAGE_TYPE_EX_RW 0010 -> 1010
*
- * pte_none is true for bits combinations 1000, 1100
+ * pte_none is true for bits combinations 1000, 1010, 1100, 1110
* pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
* pte_file is true for bits combinations 1101, 1111
- * swap pte is 1011 and 0001, 0011, 0101, 0111, 1010 and 1110 are invalid.
+ * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
*/
#ifndef __s390x__
@@ -313,33 +317,100 @@ extern unsigned long vmalloc_end;
#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
#define PAGE_RW __pgprot(_PAGE_TYPE_RW)
+#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO)
+#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW)
#define PAGE_KERNEL PAGE_RW
#define PAGE_COPY PAGE_RO
/*
- * The S390 can't do page protection for execute, and considers that the
- * same are read. Also, write permissions imply read permissions. This is
- * the closest we can get..
+ * Dependent on the EXEC_PROTECT option s390 can do execute protection.
+ * Write permission always implies read permission. In theory with a
+ * primary/secondary page table execute only can be implemented but
+ * it would cost an additional bit in the pte to distinguish all the
+ * different pte types. To avoid that execute permission currently
+ * implies read permission as well.
*/
/*xwr*/
#define __P000 PAGE_NONE
#define __P001 PAGE_RO
#define __P010 PAGE_RO
#define __P011 PAGE_RO
-#define __P100 PAGE_RO
-#define __P101 PAGE_RO
-#define __P110 PAGE_RO
-#define __P111 PAGE_RO
+#define __P100 PAGE_EX_RO
+#define __P101 PAGE_EX_RO
+#define __P110 PAGE_EX_RO
+#define __P111 PAGE_EX_RO
#define __S000 PAGE_NONE
#define __S001 PAGE_RO
#define __S010 PAGE_RW
#define __S011 PAGE_RW
-#define __S100 PAGE_RO
-#define __S101 PAGE_RO
-#define __S110 PAGE_RW
-#define __S111 PAGE_RW
+#define __S100 PAGE_EX_RO
+#define __S101 PAGE_EX_RO
+#define __S110 PAGE_EX_RW
+#define __S111 PAGE_EX_RW
+
+#ifndef __s390x__
+# define PMD_SHADOW_SHIFT 1
+# define PGD_SHADOW_SHIFT 1
+#else /* __s390x__ */
+# define PMD_SHADOW_SHIFT 2
+# define PGD_SHADOW_SHIFT 2
+#endif /* __s390x__ */
+
+static inline struct page *get_shadow_page(struct page *page)
+{
+ if (s390_noexec && !list_empty(&page->lru))
+ return virt_to_page(page->lru.next);
+ return NULL;
+}
+
+static inline pte_t *get_shadow_pte(pte_t *ptep)
+{
+ unsigned long pteptr = (unsigned long) (ptep);
+
+ if (s390_noexec) {
+ unsigned long offset = pteptr & (PAGE_SIZE - 1);
+ void *addr = (void *) (pteptr ^ offset);
+ struct page *page = virt_to_page(addr);
+ if (!list_empty(&page->lru))
+ return (pte_t *) ((unsigned long) page->lru.next |
+ offset);
+ }
+ return NULL;
+}
+
+static inline pmd_t *get_shadow_pmd(pmd_t *pmdp)
+{
+ unsigned long pmdptr = (unsigned long) (pmdp);
+
+ if (s390_noexec) {
+ unsigned long offset = pmdptr &
+ ((PAGE_SIZE << PMD_SHADOW_SHIFT) - 1);
+ void *addr = (void *) (pmdptr ^ offset);
+ struct page *page = virt_to_page(addr);
+ if (!list_empty(&page->lru))
+ return (pmd_t *) ((unsigned long) page->lru.next |
+ offset);
+ }
+ return NULL;
+}
+
+static inline pgd_t *get_shadow_pgd(pgd_t *pgdp)
+{
+ unsigned long pgdptr = (unsigned long) (pgdp);
+
+ if (s390_noexec) {
+ unsigned long offset = pgdptr &
+ ((PAGE_SIZE << PGD_SHADOW_SHIFT) - 1);
+ void *addr = (void *) (pgdptr ^ offset);
+ struct page *page = virt_to_page(addr);
+ if (!list_empty(&page->lru))
+ return (pgd_t *) ((unsigned long) page->lru.next |
+ offset);
+ }
+ return NULL;
+}
/*
* Certain architectures need to do special things when PTEs
@@ -348,7 +419,16 @@ extern unsigned long vmalloc_end;
*/
static inline void set_pte(pte_t *pteptr, pte_t pteval)
{
+ pte_t *shadow_pte = get_shadow_pte(pteptr);
+
*pteptr = pteval;
+ if (shadow_pte) {
+ if (!(pte_val(pteval) & _PAGE_INVALID) &&
+ (pte_val(pteval) & _PAGE_SWX))
+ pte_val(*shadow_pte) = pte_val(pteval) | _PAGE_RO;
+ else
+ pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY;
+ }
}
#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
@@ -466,7 +546,7 @@ static inline int pte_read(pte_t pte)
static inline void pgd_clear(pgd_t * pgdp) { }
-static inline void pmd_clear(pmd_t * pmdp)
+static inline void pmd_clear_kernel(pmd_t * pmdp)
{
pmd_val(pmdp[0]) = _PAGE_TABLE_INV;
pmd_val(pmdp[1]) = _PAGE_TABLE_INV;
@@ -474,24 +554,55 @@ static inline void pmd_clear(pmd_t * pmdp)
pmd_val(pmdp[3]) = _PAGE_TABLE_INV;
}
+static inline void pmd_clear(pmd_t * pmdp)
+{
+ pmd_t *shadow_pmd = get_shadow_pmd(pmdp);
+
+ pmd_clear_kernel(pmdp);
+ if (shadow_pmd)
+ pmd_clear_kernel(shadow_pmd);
+}
+
#else /* __s390x__ */
-static inline void pgd_clear(pgd_t * pgdp)
+static inline void pgd_clear_kernel(pgd_t * pgdp)
{
pgd_val(*pgdp) = _PGD_ENTRY_INV | _PGD_ENTRY;
}
-static inline void pmd_clear(pmd_t * pmdp)
+static inline void pgd_clear(pgd_t * pgdp)
+{
+ pgd_t *shadow_pgd = get_shadow_pgd(pgdp);
+
+ pgd_clear_kernel(pgdp);
+ if (shadow_pgd)
+ pgd_clear_kernel(shadow_pgd);
+}
+
+static inline void pmd_clear_kernel(pmd_t * pmdp)
{
pmd_val(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY;
pmd_val1(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY;
}
+static inline void pmd_clear(pmd_t * pmdp)
+{
+ pmd_t *shadow_pmd = get_shadow_pmd(pmdp);
+
+ pmd_clear_kernel(pmdp);
+ if (shadow_pmd)
+ pmd_clear_kernel(shadow_pmd);
+}
+
#endif /* __s390x__ */
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
+ pte_t *shadow_pte = get_shadow_pte(ptep);
+
pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ if (shadow_pte)
+ pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY;
}
/*
@@ -609,8 +720,11 @@ ptep_clear_flush(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
pte_t pte = *ptep;
+ pte_t *shadow_pte = get_shadow_pte(ptep);
__ptep_ipte(address, ptep);
+ if (shadow_pte)
+ __ptep_ipte(address, shadow_pte);
return pte;
}
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 7a7f50efcbd9..5af853576cbd 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -145,7 +145,7 @@ struct stack_frame {
#define start_thread(regs, new_psw, new_stackp) do { \
set_fs(USER_DS); \
- regs->psw.mask = PSW_USER_BITS; \
+ regs->psw.mask = psw_user_bits; \
regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
regs->gprs[15] = new_stackp ; \
} while (0)
@@ -154,14 +154,14 @@ struct stack_frame {
#define start_thread(regs, new_psw, new_stackp) do { \
set_fs(USER_DS); \
- regs->psw.mask = PSW_USER_BITS; \
+ regs->psw.mask = psw_user_bits; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
set_fs(USER_DS); \
- regs->psw.mask = PSW_USER32_BITS; \
+ regs->psw.mask = psw_user32_bits; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
} while (0)
diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h
index 7b768c5c68a8..fa6ca87080e8 100644
--- a/include/asm-s390/ptrace.h
+++ b/include/asm-s390/ptrace.h
@@ -266,17 +266,12 @@ typedef struct
#define PSW_ASC_SECONDARY 0x0000800000000000UL
#define PSW_ASC_HOME 0x0000C00000000000UL
-#define PSW_USER32_BITS (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | \
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | \
- PSW_MASK_PSTATE | PSW_DEFAULT_KEY)
+extern long psw_user32_bits;
#endif /* __s390x__ */
-#define PSW_KERNEL_BITS (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | \
- PSW_MASK_MCHECK | PSW_DEFAULT_KEY)
-#define PSW_USER_BITS (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | \
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | \
- PSW_MASK_PSTATE | PSW_DEFAULT_KEY)
+extern long psw_kernel_bits;
+extern long psw_user_bits;
/* This macro merges a NEW PSW mask specified by the user into
the currently active PSW mask CURRENT, modifying only those
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 542769736fc5..6b68ddda39a7 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -42,6 +42,18 @@ struct mem_chunk {
extern struct mem_chunk memory_chunk[];
+#ifdef CONFIG_S390_SWITCH_AMODE
+extern unsigned int switch_amode;
+#else
+#define switch_amode (0)
+#endif
+
+#ifdef CONFIG_S390_EXEC_PROTECT
+extern unsigned int s390_noexec;
+#else
+#define s390_noexec (0)
+#endif
+
/*
* Machine features detected in head.S
*/
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 2d9e15367c07..b957e4cda464 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -110,7 +110,7 @@ smp_call_function_on(void (*func) (void *info), void *info,
static inline void smp_send_stop(void)
{
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK);
+ __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
}
#define smp_cpu_not_running(cpu) 1
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index bd0b05ae87d2..bbe137c3ed69 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -373,8 +373,8 @@ __set_psw_mask(unsigned long mask)
__load_psw_mask(mask | (__raw_local_irq_stosm(0x00) & ~(-1UL >> 8)));
}
-#define local_mcck_enable() __set_psw_mask(PSW_KERNEL_BITS)
-#define local_mcck_disable() __set_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK)
+#define local_mcck_enable() __set_psw_mask(psw_kernel_bits)
+#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK)
#ifdef CONFIG_SMP
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index fa4dc916a9bf..66793f55c8b2 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -3,6 +3,7 @@
#include <linux/mm.h>
#include <asm/processor.h>
+#include <asm/pgalloc.h>
/*
* TLB flushing:
@@ -102,6 +103,14 @@ static inline void __flush_tlb_mm(struct mm_struct * mm)
if (unlikely(cpus_empty(mm->cpu_vm_mask)))
return;
if (MACHINE_HAS_IDTE) {
+ pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd);
+
+ if (shadow_pgd) {
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,0"
+ : : "a" (2048),
+ "a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" );
+ }
asm volatile(
" .insn rrf,0xb98e0000,0,%0,%1,0"
: : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc");
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index 73ac4e82217b..0235970278f0 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -90,6 +90,8 @@ struct uaccess_ops {
extern struct uaccess_ops uaccess;
extern struct uaccess_ops uaccess_std;
extern struct uaccess_ops uaccess_mvcos;
+extern struct uaccess_ops uaccess_mvcos_switch;
+extern struct uaccess_ops uaccess_pt;
static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
{