From 38a6f4266989c4dae68eccb1a5cb4580a48003e4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 15:21:35 -0400 Subject: arch/tile: complete migration to new kmap_atomic scheme This change makes KM_TYPE_NR independent of the actual deprecated list of km_type values, which are no longer used in tile code anywhere. For now we leave it set to 8, allowing that many nested mappings, and thus reserving 32MB of address space. A few remaining places using KM_* values were cleaned up as well. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/highmem.h | 1 - arch/tile/include/asm/kmap_types.h | 34 ++++++++++++++++++++++++---------- arch/tile/include/asm/pgtable.h | 6 ++---- arch/tile/kernel/machine_kexec.c | 6 +++--- arch/tile/lib/memcpy_tile64.c | 11 ++++++++--- arch/tile/mm/highmem.c | 2 +- arch/tile/mm/pgtable.c | 4 ++-- 7 files changed, 40 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h index e0f7ee186721..b2a6c5de79ab 100644 --- a/arch/tile/include/asm/highmem.h +++ b/arch/tile/include/asm/highmem.h @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h index 1480106d1c05..3d0f20246260 100644 --- a/arch/tile/include/asm/kmap_types.h +++ b/arch/tile/include/asm/kmap_types.h @@ -16,28 +16,42 @@ #define _ASM_TILE_KMAP_TYPES_H /* - * In TILE Linux each set of four of these uses another 16MB chunk of - * address space, given 64 tiles and 64KB pages, so we only enable - * ones that are required by the kernel configuration. + * In 32-bit TILE Linux we have to balance the desire to have a lot of + * nested atomic mappings with the fact that large page sizes and many + * processors chew up address space quickly. In a typical + * 64-processor, 64KB-page layout build, making KM_TYPE_NR one larger + * adds 4MB of required address-space. For now we leave KM_TYPE_NR + * set to depth 8. */ enum km_type { + KM_TYPE_NR = 8 +}; + +/* + * We provide dummy definitions of all the stray values that used to be + * required for kmap_atomic() and no longer are. + */ +enum { KM_BOUNCE_READ, KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, KM_BIO_SRC_IRQ, + KM_BIO_DST_IRQ, + KM_PTE0, + KM_PTE1, KM_IRQ0, KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, - KM_MEMCPY0, - KM_MEMCPY1, -#if defined(CONFIG_HIGHPTE) - KM_PTE0, - KM_PTE1, -#endif - KM_TYPE_NR + KM_SYNC_ICACHE, + KM_SYNC_DCACHE, + KM_UML_USERCOPY, + KM_IRQ_PTE, + KM_NMI, + KM_NMI_PTE, + KM_KDB }; #endif /* _ASM_TILE_KMAP_TYPES_H */ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index dc4ccdd855bc..a6604e9485da 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -344,10 +344,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pgd_offset_k(address) pgd_offset(&init_mm, address) #if defined(CONFIG_HIGHPTE) -extern pte_t *_pte_offset_map(pmd_t *, unsigned long address, enum km_type); -#define pte_offset_map(dir, address) \ - _pte_offset_map(dir, address, KM_PTE0) -#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) +extern pte_t *pte_offset_map(pmd_t *, unsigned long address); +#define pte_unmap(pte) kunmap_atomic(pte) #else #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) #define pte_unmap(pte) do { } while (0) diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index ba7a265d6179..0d8b9e933487 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -182,13 +182,13 @@ static void kexec_find_and_set_command_line(struct kimage *image) if ((entry & IND_SOURCE)) { void *va = - kmap_atomic_pfn(entry >> PAGE_SHIFT, KM_USER0); + kmap_atomic_pfn(entry >> PAGE_SHIFT); r = kexec_bn2cl(va); if (r) { command_line = r; break; } - kunmap_atomic(va, KM_USER0); + kunmap_atomic(va); } } @@ -198,7 +198,7 @@ static void kexec_find_and_set_command_line(struct kimage *image) hverr = hv_set_command_line( (HV_VirtAddr) command_line, strlen(command_line)); - kunmap_atomic(command_line, KM_USER0); + kunmap_atomic(command_line); } else { pr_info("%s: no command line found; making empty\n", __func__); diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index dfedea7b266b..f7d4a6ad61e8 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c @@ -54,7 +54,7 @@ typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); * we must run with interrupts disabled to avoid the risk of some * other code seeing the incoherent data in our cache. (Recall that * our cache is indexed by PA, so even if the other code doesn't use - * our KM_MEMCPY virtual addresses, they'll still hit in cache using + * our kmap_atomic virtual addresses, they'll still hit in cache using * the normal VAs that aren't supposed to hit in cache.) */ static void memcpy_multicache(void *dest, const void *source, @@ -64,6 +64,7 @@ static void memcpy_multicache(void *dest, const void *source, unsigned long flags, newsrc, newdst; pmd_t *pmdp; pte_t *ptep; + int type0, type1; int cpu = get_cpu(); /* @@ -77,7 +78,8 @@ static void memcpy_multicache(void *dest, const void *source, sim_allow_multiple_caching(1); /* Set up the new dest mapping */ - idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + KM_MEMCPY0; + type0 = kmap_atomic_idx_push(); + idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); ptep = pte_offset_kernel(pmdp, newdst); @@ -87,7 +89,8 @@ static void memcpy_multicache(void *dest, const void *source, } /* Set up the new source mapping */ - idx += (KM_MEMCPY0 - KM_MEMCPY1); + type1 = kmap_atomic_idx_push(); + idx += (type0 - type1); src_pte = hv_pte_set_nc(src_pte); src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); @@ -119,6 +122,8 @@ static void memcpy_multicache(void *dest, const void *source, * We're done: notify the simulator that all is back to normal, * and re-enable interrupts and pre-emption. */ + kmap_atomic_idx_pop(); + kmap_atomic_idx_pop(); sim_allow_multiple_caching(0); local_irq_restore(flags); put_cpu(); diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index abb57331cf6e..31dbbd9afe47 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -227,7 +227,7 @@ EXPORT_SYMBOL(kmap_atomic_prot); void *__kmap_atomic(struct page *page) { /* PAGE_NONE is a magic value that tells us to check immutability. */ - return kmap_atomic_prot(page, type, PAGE_NONE); + return kmap_atomic_prot(page, PAGE_NONE); } EXPORT_SYMBOL(__kmap_atomic); diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 335c24621c41..1f5430c53d0d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -134,9 +134,9 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) } #if defined(CONFIG_HIGHPTE) -pte_t *_pte_offset_map(pmd_t *dir, unsigned long address, enum km_type type) +pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) { - pte_t *pte = kmap_atomic(pmd_page(*dir), type) + + pte_t *pte = kmap_atomic(pmd_page(*dir)) + (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK; return &pte[pte_index(address)]; } -- cgit v1.2.3-59-g8ed1b From 5d966115de84c22cd4df029cb00be0e51fab6c10 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 15:24:29 -0400 Subject: arch/tile: bomb raw_local_irq_ to arch_local_irq_ This completes the tile migration to the new naming scheme for the architecture-specific irq management code. Signed-off-by: Chris Metcalf --- arch/tile/kernel/early_printk.c | 2 +- arch/tile/kernel/hardwall.c | 4 ++-- arch/tile/kernel/irq.c | 4 ++-- arch/tile/kernel/messaging.c | 2 +- arch/tile/kernel/reboot.c | 6 +++--- arch/tile/kernel/setup.c | 8 ++++---- arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/time.c | 8 ++++---- 8 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c index 2c54fd43a8a0..493a0e66d916 100644 --- a/arch/tile/kernel/early_printk.c +++ b/arch/tile/kernel/early_printk.c @@ -54,7 +54,7 @@ void early_printk(const char *fmt, ...) void early_panic(const char *fmt, ...) { va_list ap; - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); va_start(ap, fmt); early_printk("Kernel panic - not syncing: "); early_vprintk(fmt, ap); diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 1e54a7843410..70b829a15ae5 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -151,12 +151,12 @@ enum direction_protect { static void enable_firewall_interrupts(void) { - raw_local_irq_unmask_now(INT_UDN_FIREWALL); + arch_local_irq_unmask_now(INT_UDN_FIREWALL); } static void disable_firewall_interrupts(void) { - raw_local_irq_mask_now(INT_UDN_FIREWALL); + arch_local_irq_mask_now(INT_UDN_FIREWALL); } /* Set up hardwall on this cpu based on the passed hardwall_info. */ diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index e63917687e99..128805ef8f2c 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -26,7 +26,7 @@ #define IS_HW_CLEARED 1 /* - * The set of interrupts we enable for raw_local_irq_enable(). + * The set of interrupts we enable for arch_local_irq_enable(). * This is initialized to have just a single interrupt that the kernel * doesn't actually use as a sentinel. During kernel init, * interrupts are added as the kernel gets prepared to support them. @@ -225,7 +225,7 @@ void __cpuinit setup_irq_regs(void) /* Enable interrupt delivery. */ unmask_irqs(~0UL); #if CHIP_HAS_IPI() - raw_local_irq_unmask(INT_IPI_K); + arch_local_irq_unmask(INT_IPI_K); #endif } diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 997e3933f726..0858ee6b520f 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -34,7 +34,7 @@ void __cpuinit init_messaging(void) panic("hv_register_message_state: error %d", rc); /* Make sure downcall interrupts will be enabled. */ - raw_local_irq_unmask(INT_INTCTRL_K); + arch_local_irq_unmask(INT_INTCTRL_K); } void hv_message_intr(struct pt_regs *regs, int intnum) diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c index acd86d20beba..baa3d905fee2 100644 --- a/arch/tile/kernel/reboot.c +++ b/arch/tile/kernel/reboot.c @@ -27,7 +27,7 @@ void machine_halt(void) { warn_early_printk(); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_halt(); } @@ -35,14 +35,14 @@ void machine_halt(void) void machine_power_off(void) { warn_early_printk(); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_power_off(); } void machine_restart(char *cmd) { - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd); } diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index ae51cad12da0..fb0b3cbeae14 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -868,14 +868,14 @@ void __cpuinit setup_cpu(int boot) /* Allow asynchronous TLB interrupts. */ #if CHIP_HAS_TILE_DMA() - raw_local_irq_unmask(INT_DMATLB_MISS); - raw_local_irq_unmask(INT_DMATLB_ACCESS); + arch_local_irq_unmask(INT_DMATLB_MISS); + arch_local_irq_unmask(INT_DMATLB_ACCESS); #endif #if CHIP_HAS_SN_PROC() - raw_local_irq_unmask(INT_SNITLB_MISS); + arch_local_irq_unmask(INT_SNITLB_MISS); #endif #ifdef __tilegx__ - raw_local_irq_unmask(INT_SINGLE_STEP_K); + arch_local_irq_unmask(INT_SINGLE_STEP_K); #endif /* diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 75255d90aff3..9575b37a8b75 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -115,7 +115,7 @@ static void smp_start_cpu_interrupt(void) static void smp_stop_cpu_interrupt(void) { set_cpu_online(smp_processor_id(), 0); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); for (;;) asm("nap"); } diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 6bed820e1421..f2e156e44692 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -132,7 +132,7 @@ static int tile_timer_set_next_event(unsigned long ticks, { BUG_ON(ticks > MAX_TICK); __insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks); - raw_local_irq_unmask_now(INT_TILE_TIMER); + arch_local_irq_unmask_now(INT_TILE_TIMER); return 0; } @@ -143,7 +143,7 @@ static int tile_timer_set_next_event(unsigned long ticks, static void tile_timer_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { - raw_local_irq_mask_now(INT_TILE_TIMER); + arch_local_irq_mask_now(INT_TILE_TIMER); } /* @@ -172,7 +172,7 @@ void __cpuinit setup_tile_timer(void) evt->cpumask = cpumask_of(smp_processor_id()); /* Start out with timer not firing. */ - raw_local_irq_mask_now(INT_TILE_TIMER); + arch_local_irq_mask_now(INT_TILE_TIMER); /* Register tile timer. */ clockevents_register_device(evt); @@ -188,7 +188,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) * Mask the timer interrupt here, since we are a oneshot timer * and there are now by definition no events pending. */ - raw_local_irq_mask(INT_TILE_TIMER); + arch_local_irq_mask(INT_TILE_TIMER); /* Track time spent here in an interrupt context */ irq_enter(); -- cgit v1.2.3-59-g8ed1b From d59e609d6568ba5ab23c256f412ac5ec360722c1 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 15:25:16 -0400 Subject: arch/tile: avoid __must_check warning on one strict_strtol check For the "initfree" boot argument it's not that big a deal, but to avoid warnings in the code, we check for a valid value before allowing the specified argument to override the kernel default. Signed-off-by: Chris Metcalf --- arch/tile/mm/init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 78e1982cb6c9..0b9ce69b0ee5 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -988,8 +988,12 @@ static long __write_once initfree = 1; /* Select whether to free (1) or mark unusable (0) the __init pages. */ static int __init set_initfree(char *str) { - strict_strtol(str, 0, &initfree); - pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); + long val; + if (strict_strtol(str, 0, &val)) { + initfree = val; + pr_info("initfree: %s free init pages\n", + initfree ? "will" : "won't"); + } return 1; } __setup("initfree=", set_initfree); -- cgit v1.2.3-59-g8ed1b From 34a89d26bdc4ba46a406fa3842239e921c493d44 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 28 Oct 2010 15:03:30 -0400 Subject: arch/tile: correct double syscall restart for nested signals This change is modelled on similar fixes for other architectures. The pt_regs "faultnum" member is set to the trap (fault) number that caused us to enter the kernel, and is INT_SWINT_1 for the syscall software interrupt. We already supported a pseudo value, INT_SWINT_1_SIGRETURN, that we used for the rt_sigreturn syscall; it avoided the case where one signal was handled, then we "tail-called" to another handler. This change avoids the similar case where we start to call one handler, then are preempted into another handler when we start trying to run the first handler. We clear ->faultnum after calling handle_signal(), and to be paranoid also in the case where there was no signal to deliver. Signed-off-by: Chris Metcalf --- arch/tile/kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index fb28e85ae3ae..704ce0bce833 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -330,7 +330,7 @@ void do_signal(struct pt_regs *regs) current_thread_info()->status &= ~TS_RESTORE_SIGMASK; } - return; + goto done; } /* Did we come from a system call? */ @@ -358,4 +358,8 @@ void do_signal(struct pt_regs *regs) current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } + +done: + /* Avoid double syscall restart if there are nested signals. */ + regs->faultnum = INT_SWINT_1_SIGRETURN; } -- cgit v1.2.3-59-g8ed1b From 1deb9c5dfb179819ecdbf80a1d121e26c63caab3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 28 Oct 2010 15:47:06 -0400 Subject: arch/tile: don't allow user code to set the PL via ptrace or signal return The kernel was allowing any component of the pt_regs to be updated either by signal handlers writing to the stack, or by processes writing via PTRACE_POKEUSR or PTRACE_SETREGS, which meant they could set their PL up from 0 to 1 and get access to kernel code and data (or, in practice, cause a kernel panic). We now always reset the ex1 field, allowing the user to set their ICS bit only. Signed-off-by: Chris Metcalf --- arch/tile/kernel/ptrace.c | 39 +++++++++++++++++++++------------------ arch/tile/kernel/signal.c | 3 +++ 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 9cd29884c09f..e92e40527d6d 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -50,10 +50,10 @@ long arch_ptrace(struct task_struct *child, long request, { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; - int i; long ret = -EIO; - unsigned long *childregs; char *childreg; + struct pt_regs copyregs; + int ex1_offset; switch (request) { @@ -80,6 +80,16 @@ long arch_ptrace(struct task_struct *child, long request, if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; + + /* Guard against overwrites of the privilege level. */ + ex1_offset = PTREGS_OFFSET_EX1; +#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) + if (is_compat_task()) /* point at low word */ + ex1_offset += sizeof(compat_long_t); +#endif + if (addr == ex1_offset) + data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); + #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) @@ -96,26 +106,19 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: /* Get all registers from the child. */ - if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) - break; - childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); - ++i) { - ret = __put_user(childregs[i], &datap[i]); - if (ret != 0) - break; + if (copy_to_user(datap, task_pt_regs(child), + sizeof(struct pt_regs)) == 0) { + ret = 0; } break; case PTRACE_SETREGS: /* Set all registers in the child. */ - if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) - break; - childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); - ++i) { - ret = __get_user(childregs[i], &datap[i]); - if (ret != 0) - break; + if (copy_from_user(©regs, datap, + sizeof(struct pt_regs)) == 0) { + copyregs.ex1 = + PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); + *task_pt_regs(child) = copyregs; + ret = 0; } break; diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 704ce0bce833..687719d4abd1 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -71,6 +71,9 @@ int restore_sigcontext(struct pt_regs *regs, for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) err |= __get_user(regs->regs[i], &sc->gregs[i]); + /* Ensure that the PL is always set to USER_PL. */ + regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1)); + regs->faultnum = INT_SWINT_1_SIGRETURN; err |= __get_user(*pr0, &sc->gregs[0]); -- cgit v1.2.3-59-g8ed1b From 2c7387ef9969bb073c25ecbdcc5be30770267b16 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 28 Oct 2010 16:07:07 -0400 Subject: asm-generic/stat.h: support 64-bit file time_t for stat() The existing asm-generic/stat.h specifies st_mtime, etc., as a 32-value, and works well for 32-bit architectures (currently microblaze, score, and 32-bit tile). However, for 64-bit architectures it isn't sufficient to return 32 bits of time_t; this isn't good insurance against the 2037 rollover. (It also makes glibc support less convenient, since we can't use glibc's handy STAT_IS_KERNEL_STAT mode.) This change extends the two "timespec" fields for each of the three atime, mtime, and ctime fields from "int" to "long". As a result, on 32-bit platforms nothing changes, and 64-bit platforms will now work as expected. The only wrinkle is 32-bit userspace under 64-bit kernels taking advantage of COMPAT mode. For these, we leave the "struct stat64" definitions with the "int" versions of the time_t and nsec fields, so that architectures can implement compat_sys_stat64() and friends with sys_stat64(), etc., and get the expected 32-bit structure layout. This requires a field-by-field copy in the kernel, implemented by the code guarded under __ARCH_WANT_STAT64. This does mean that the shape of the "struct stat" and "struct stat64" structures is different on a 64-bit kernel, but only one of the two structures should ever be used by any given process: "struct stat" is meant for 64-bit userspace only, and "struct stat64" for 32-bit userspace only. (On a 32-bit kernel the two structures continue to have the same shape, since "long" is 32 bits.) The alternative is keeping the two structures the same shape on 64-bit kernels, which means a 64-bit time_t in "struct stat64" for 32-bit processes. This is a little unnatural since 32-bit userspace can't do anything with 64 bits of time_t information, since time_t is just "long", not "int64_t"; and in any case 32-bit userspace might expect to be running under a 32-bit kernel, which can't provide the high 32 bits anyway. In the case of a 32-bit kernel we'd then be extending the kernel's 32-bit time_t to 64 bits, then truncating it back to 32 bits again in userspace, for no particular reason. And, as mentioned above, if we have 64-bit time_t for 32-bit processes we can't easily use glibc's STAT_IS_KERNEL_STAT, since glibc's stat structure requires an embedded "struct timespec", which is a pair of "long" (32-bit) values in a 32-bit userspace. "Inventive" solutions are possible, but are pretty hacky. Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann --- arch/tile/include/asm/stat.h | 3 +++ arch/tile/include/asm/unistd.h | 1 + arch/tile/kernel/compat.c | 10 +++++----- include/asm-generic/stat.h | 14 +++++++------- 4 files changed, 16 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h index 3dc90fa92c70..b16e5db8f0e7 100644 --- a/arch/tile/include/asm/stat.h +++ b/arch/tile/include/asm/stat.h @@ -1 +1,4 @@ +#ifdef CONFIG_COMPAT +#define __ARCH_WANT_STAT64 /* Used for compat_sys_stat64() etc. */ +#endif #include diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index f2e3ff485333..b35c2db71199 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -41,6 +41,7 @@ __SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) #ifdef CONFIG_COMPAT #define __ARCH_WANT_SYS_LLSEEK #endif +#define __ARCH_WANT_SYS_NEWFSTATAT #endif #endif /* _ASM_TILE_UNISTD_H */ diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 77739cdd9462..67617a05e602 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -148,11 +148,11 @@ long tile_compat_sys_msgrcv(int msqid, #define compat_sys_readahead sys32_readahead #define compat_sys_sync_file_range compat_sys_sync_file_range2 -/* The native 64-bit "struct stat" matches the 32-bit "struct stat64". */ -#define compat_sys_stat64 sys_newstat -#define compat_sys_lstat64 sys_newlstat -#define compat_sys_fstat64 sys_newfstat -#define compat_sys_fstatat64 sys_newfstatat +/* We leverage the "struct stat64" type for 32-bit time_t/nsec. */ +#define compat_sys_stat64 sys_stat64 +#define compat_sys_lstat64 sys_lstat64 +#define compat_sys_fstat64 sys_fstat64 +#define compat_sys_fstatat64 sys_fstatat64 /* The native sys_ptrace dynamically handles compat binaries. */ #define compat_sys_ptrace sys_ptrace diff --git a/include/asm-generic/stat.h b/include/asm-generic/stat.h index 47e64170305d..bd8cad21998e 100644 --- a/include/asm-generic/stat.h +++ b/include/asm-generic/stat.h @@ -33,18 +33,18 @@ struct stat { int st_blksize; /* Optimal block size for I/O. */ int __pad2; long st_blocks; /* Number 512-byte blocks allocated. */ - int st_atime; /* Time of last access. */ - unsigned int st_atime_nsec; - int st_mtime; /* Time of last modification. */ - unsigned int st_mtime_nsec; - int st_ctime; /* Time of last status change. */ - unsigned int st_ctime_nsec; + long st_atime; /* Time of last access. */ + unsigned long st_atime_nsec; + long st_mtime; /* Time of last modification. */ + unsigned long st_mtime_nsec; + long st_ctime; /* Time of last status change. */ + unsigned long st_ctime_nsec; unsigned int __unused4; unsigned int __unused5; }; -#if __BITS_PER_LONG != 64 /* This matches struct stat64 in glibc2.1. Only used for 32 bit. */ +#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) struct stat64 { unsigned long long st_dev; /* Device. */ unsigned long long st_ino; /* File serial number. */ -- cgit v1.2.3-59-g8ed1b From d02db4f8d79c5841ba32b326edb75ea6acd081ca Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 12:46:10 -0400 Subject: arch/tile: mark "hardwall" device as non-seekable Arnd's recent patch series tagged this device with noop_llseek, conservatively. In fact, it should be no_llseek, which we arrange for by opening the device with nonseekable_open(). Signed-off-by: Chris Metcalf --- arch/tile/kernel/hardwall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 70b829a15ae5..e910530436e6 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -768,13 +768,13 @@ static int hardwall_release(struct inode *inode, struct file *file) } static const struct file_operations dev_hardwall_fops = { + .open = nonseekable_open, .unlocked_ioctl = hardwall_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = hardwall_compat_ioctl, #endif .flush = hardwall_flush, .release = hardwall_release, - .llseek = noop_llseek, }; static struct cdev hardwall_dev; -- cgit v1.2.3-59-g8ed1b