From a6fa8e5a6172a5a5bc06ed04f34e50b36c978127 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 30 Jan 2008 13:30:00 +0100 Subject: time: clean hungarian notation from timers Clean up hungarian notation from timer code. Signed-off-by: Pavel Machek Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 78cf899b4409..de0e71359ede 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -5,7 +5,7 @@ #include #include -struct tvec_t_base_s; +struct tvec_base; struct timer_list { struct list_head entry; @@ -14,7 +14,7 @@ struct timer_list { void (*function)(unsigned long); unsigned long data; - struct tvec_t_base_s *base; + struct tvec_base *base; #ifdef CONFIG_TIMER_STATS void *start_site; char start_comm[16]; @@ -22,7 +22,7 @@ struct timer_list { #endif }; -extern struct tvec_t_base_s boot_tvec_bases; +extern struct tvec_base boot_tvec_bases; #define TIMER_INITIALIZER(_function, _expires, _data) { \ .function = (_function), \ -- cgit v1.3-8-gc7d7 From 1d76c2622813fbc692b0d323028cfef9ee36051a Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 30 Jan 2008 13:30:01 +0100 Subject: clocksource: make CLOCKSOURCE_MASK bullet-proof Signed-off-by: Atsushi Nemoto Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 107787aacb64..07b42153de24 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -103,7 +103,7 @@ struct clocksource { #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 /* simplify initialization of mask field */ -#define CLOCKSOURCE_MASK(bits) (cycle_t)(bits<64 ? ((1ULL< Date: Wed, 30 Jan 2008 13:30:02 +0100 Subject: clocksource: add unregister function to disable unusable clocksources On x86 the PIT might become an unusable clocksource. Add an unregister function to provide a possibilty to remove the PIT from the list of available clock sources. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8253.c | 1 + include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 15 +++++++++++++++ 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 0f8f35458a8f..decc5d294d76 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -13,6 +13,7 @@ #include #include #include +#include DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 07b42153de24..85778a4b1209 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -215,6 +215,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, /* used to install a new clocksource */ extern int clocksource_register(struct clocksource*); +extern void clocksource_unregister(struct clocksource*); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index edd5ef8e1765..6e9259a5d501 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -337,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating) spin_unlock_irqrestore(&clocksource_lock, flags); } +/** + * clocksource_unregister - remove a registered clocksource + */ +void clocksource_unregister(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&clocksource_lock, flags); + list_del(&cs->list); + if (clocksource_override == cs) + clocksource_override = NULL; + next_clocksource = select_clocksource(); + spin_unlock_irqrestore(&clocksource_lock, flags); +} + #ifdef CONFIG_SYSFS /** * sysfs_show_current_clocksources - sysfs interface for current clocksource -- cgit v1.3-8-gc7d7 From e3f37a54f690d3e64995ea7ecea08c5ab3070faf Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: x86: assign IRQs to HPET timers The userspace API for the HPET (see Documentation/hpet.txt) did not work. The HPET_IE_ON ioctl was failing as there was no IRQ assigned to the timer device. This patch fixes it by allocating IRQs to timer blocks in the HPET. arch/x86/kernel/hpet.c | 13 +++++-------- drivers/char/hpet.c | 45 ++++++++++++++++++++++++++++++++++++++------- include/linux/hpet.h | 2 +- 3 files changed, 44 insertions(+), 16 deletions(-) Signed-off-by: Balaji Rao Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 13 +++++-------- drivers/char/hpet.c | 45 ++++++++++++++++++++++++++++++++++++++------- include/linux/hpet.h | 2 +- 3 files changed, 44 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9ec2ab793042..786aa227afdf 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -117,8 +117,7 @@ int is_hpet_enabled(void) static void hpet_reserve_platform_timers(unsigned long id) { struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; + unsigned int nrtimers; struct hpet_data hd; nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; @@ -133,16 +132,14 @@ static void hpet_reserve_platform_timers(unsigned long id) #ifdef CONFIG_HPET_EMULATE_RTC hpet_reserve_timer(&hd, 1); #endif - hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; - for (i = 2; i < nrtimers; timer++, i++) - hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; - + /* + * IRQs for the other timers are assigned dynamically + * in hpet_alloc + */ hpet_alloc(&hd); - } #else static void hpet_reserve_platform_timers(unsigned long id) { } diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4c16778e3f84..593b32cfbc33 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -806,14 +806,14 @@ static unsigned long hpet_calibrate(struct hpets *hpetp) int hpet_alloc(struct hpet_data *hdp) { - u64 cap, mcfg; + u64 cap, mcfg, hpet_config; struct hpet_dev *devp; - u32 i, ntimer; + u32 i, ntimer, irq; struct hpets *hpetp; size_t siz; struct hpet __iomem *hpet; static struct hpets *last = NULL; - unsigned long period; + unsigned long period, irq_bitmap; unsigned long long temp; /* @@ -840,11 +840,41 @@ int hpet_alloc(struct hpet_data *hdp) hpetp->hp_hpet_phys = hdp->hd_phys_address; hpetp->hp_ntimer = hdp->hd_nirqs; + hpet = hpetp->hp_hpet; - for (i = 0; i < hdp->hd_nirqs; i++) - hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i]; + /* Assign IRQs statically for legacy devices */ + hpetp->hp_dev[0].hd_hdwirq = hdp->hd_irq[0]; + hpetp->hp_dev[1].hd_hdwirq = hdp->hd_irq[1]; - hpet = hpetp->hp_hpet; + /* Assign IRQs dynamically for the others */ + for (i = 2, devp = &hpetp->hp_dev[2]; i < hdp->hd_nirqs; i++, devp++) { + struct hpet_timer __iomem *timer; + + timer = &hpet->hpet_timers[devp - hpetp->hp_dev]; + + hpet_config = readq(&timer->hpet_config); + irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK) + >> Tn_INT_ROUTE_CAP_SHIFT; + if (!irq_bitmap) + irq = 0; /* No valid IRQ Assignable */ + else { + irq = find_first_bit(&irq_bitmap, 32); + do { + hpet_config |= irq << Tn_INT_ROUTE_CNF_SHIFT; + writeq(hpet_config, &timer->hpet_config); + + /* + * Verify whether we have written a valid + * IRQ number by reading it back again + */ + hpet_config = readq(&timer->hpet_config); + if (irq == (hpet_config & Tn_INT_ROUTE_CNF_MASK) + >> Tn_INT_ROUTE_CNF_SHIFT) + break; /* Success */ + } while ((irq = (find_next_bit(&irq_bitmap, 32, irq)))); + } + hpetp->hp_dev[i].hd_hdwirq = irq; + } cap = readq(&hpet->hpet_cap); @@ -875,7 +905,8 @@ int hpet_alloc(struct hpet_data *hdp) hpetp->hp_which, hdp->hd_phys_address, hpetp->hp_ntimer > 1 ? "s" : ""); for (i = 0; i < hpetp->hp_ntimer; i++) - printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]); + printk("%s %d", i > 0 ? "," : "", + hpetp->hp_dev[i].hd_hdwirq); printk("\n"); printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n", diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 707f7cb9e795..e3c0b2aa944c 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -64,7 +64,7 @@ struct hpet { */ #define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL) -#define Tn_INI_ROUTE_CAP_SHIFT (32UL) +#define Tn_INT_ROUTE_CAP_SHIFT (32UL) #define Tn_FSB_INT_DELCAP_MASK (0x8000UL) #define Tn_FSB_INT_DELCAP_SHIFT (15) #define Tn_FSB_EN_CNF_MASK (0x4000UL) -- cgit v1.3-8-gc7d7 From 6378ddb592158db4b42197f1bc8666228800e379 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Wed, 30 Jan 2008 13:30:04 +0100 Subject: time: track accurate idle time with tick_sched.idle_sleeptime Current idle time in kstat is based on jiffies and is coarse grained. tick_sched.idle_sleeptime is making some attempt to keep track of idle time in a fine grained manner. But, it is not handling the time spent in interrupts fully. Make tick_sched.idle_sleeptime accurate with respect to time spent on handling interrupts and also add tick_sched.idle_lastupdate, which keeps track of last time when idle_sleeptime was updated. This statistics will be crucial for cpufreq-ondemand governor, which can shed some conservative gaurd band that is uses today while setting the frequency. The ondemand changes that uses the exact idle time is coming soon. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 6 +++++ kernel/softirq.c | 7 ++++- kernel/time/tick-sched.c | 70 +++++++++++++++++++++++++++++++++--------------- 3 files changed, 60 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f4a1395e05ff..0fadf95debe1 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -51,8 +51,10 @@ struct tick_sched { unsigned long idle_jiffies; unsigned long idle_calls; unsigned long idle_sleeps; + int idle_active; ktime_t idle_entrytime; ktime_t idle_sleeptime; + ktime_t idle_lastupdate; ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; @@ -103,6 +105,8 @@ extern void tick_nohz_stop_sched_tick(void); extern void tick_nohz_restart_sched_tick(void); extern void tick_nohz_update_jiffies(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern void tick_nohz_stop_idle(int cpu); +extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_stop_sched_tick(void) { } static inline void tick_nohz_restart_sched_tick(void) { } @@ -113,6 +117,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void) return len; } +static inline void tick_nohz_stop_idle(int cpu) { } +static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; } # endif /* !NO_HZ */ #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index 8fe1ff40102d..d7837d45419e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -280,9 +280,14 @@ asmlinkage void do_softirq(void) */ void irq_enter(void) { +#ifdef CONFIG_NO_HZ + int cpu = smp_processor_id(); + if (idle_cpu(cpu) && !in_interrupt()) + tick_nohz_stop_idle(cpu); +#endif __irq_enter(); #ifdef CONFIG_NO_HZ - if (idle_cpu(smp_processor_id())) + if (idle_cpu(cpu)) tick_nohz_update_jiffies(); #endif } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 49e12f6a4bab..63f24b550695 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -143,6 +143,44 @@ void tick_nohz_update_jiffies(void) local_irq_restore(flags); } +void tick_nohz_stop_idle(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + if (ts->idle_active) { + ktime_t now, delta; + now = ktime_get(); + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + ts->idle_active = 0; + } +} + +static ktime_t tick_nohz_start_idle(int cpu) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, delta; + + now = ktime_get(); + if (ts->idle_active) { + delta = ktime_sub(now, ts->idle_entrytime); + ts->idle_lastupdate = now; + ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); + } + ts->idle_entrytime = now; + ts->idle_active = 1; + return now; +} + +u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + *last_update_time = ktime_to_us(ts->idle_lastupdate); + return ktime_to_us(ts->idle_sleeptime); +} + /** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * @@ -155,13 +193,14 @@ void tick_nohz_stop_sched_tick(void) unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; unsigned long rt_jiffies; struct tick_sched *ts; - ktime_t last_update, expires, now, delta; + ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; int cpu; local_irq_save(flags); cpu = smp_processor_id(); + now = tick_nohz_start_idle(cpu); ts = &per_cpu(tick_cpu_sched, cpu); /* @@ -193,19 +232,7 @@ void tick_nohz_stop_sched_tick(void) } } - now = ktime_get(); - /* - * When called from irq_exit we need to account the idle sleep time - * correctly. - */ - if (ts->tick_stopped) { - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - } - - ts->idle_entrytime = now; ts->idle_calls++; - /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); @@ -337,23 +364,22 @@ void tick_nohz_restart_sched_tick(void) int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long ticks; - ktime_t now, delta; + ktime_t now; - if (!ts->tick_stopped) + local_irq_disable(); + tick_nohz_stop_idle(cpu); + + if (!ts->tick_stopped) { + local_irq_enable(); return; + } /* Update jiffies first */ - now = ktime_get(); - - local_irq_disable(); select_nohz_load_balancer(0); + now = ktime_get(); tick_do_update_jiffies64(now); cpu_clear(cpu, nohz_cpu_mask); - /* Account the idle time */ - delta = ktime_sub(now, ts->idle_entrytime); - ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); - /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick -- cgit v1.3-8-gc7d7 From c202f298de59c17c0a9799dc0e1b9e0629347935 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:08 +0100 Subject: x86: clean up arch/x86/ia32/sys_ia32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/ia32/sys_ia32.c | 498 ++++++++++++++++++++++++----------------------- include/linux/compat.h | 4 + 2 files changed, 257 insertions(+), 245 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index bee96d614432..58991abc5b59 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -1,29 +1,29 @@ /* * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on - * sys_sparc32 + * sys_sparc32 * * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger - * Copyright (C) 1999 Arun Sharma - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1999 Arun Sharma + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang - * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) + * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) * * These routines maintain argument size conversion between 32bit and 64bit - * environment. In 2.5 most of this should be moved to a generic directory. + * environment. In 2.5 most of this should be moved to a generic directory. * * This file assumes that there is a hole at the end of user address space. - * - * Some of the functions are LE specific currently. These are hopefully all marked. - * This should be fixed. + * + * Some of the functions are LE specific currently. These are + * hopefully all marked. This should be fixed. */ #include #include -#include -#include +#include +#include #include #include #include @@ -90,43 +90,44 @@ int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf) if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino) return -EOVERFLOW; if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) || - __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) || - __put_user (ino, &ubuf->st_ino) || - __put_user (kbuf->mode, &ubuf->st_mode) || - __put_user (kbuf->nlink, &ubuf->st_nlink) || - __put_user (uid, &ubuf->st_uid) || - __put_user (gid, &ubuf->st_gid) || - __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || - __put_user (kbuf->size, &ubuf->st_size) || - __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) || - __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) || - __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) || - __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user (kbuf->blksize, &ubuf->st_blksize) || - __put_user (kbuf->blocks, &ubuf->st_blocks)) + __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) || + __put_user(ino, &ubuf->st_ino) || + __put_user(kbuf->mode, &ubuf->st_mode) || + __put_user(kbuf->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || + __put_user(kbuf->size, &ubuf->st_size) || + __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) || + __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(kbuf->blksize, &ubuf->st_blksize) || + __put_user(kbuf->blocks, &ubuf->st_blocks)) return -EFAULT; return 0; } -asmlinkage long -sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high) +asmlinkage long sys32_truncate64(char __user *filename, + unsigned long offset_low, + unsigned long offset_high) { return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); } -asmlinkage long -sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high) +asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, + unsigned long offset_high) { return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } -/* Another set for IA32/LFS -- x86_64 struct stat is different due to - support for 64bit inode numbers. */ - -static int -cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) +/* + * Another set for IA32/LFS -- x86_64 struct stat is different due to + * support for 64bit inode numbers. + */ +static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) { typeof(ubuf->st_uid) uid = 0; typeof(ubuf->st_gid) gid = 0; @@ -134,38 +135,39 @@ cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) SET_GID(gid, stat->gid); if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || - __put_user (stat->ino, &ubuf->__st_ino) || - __put_user (stat->ino, &ubuf->st_ino) || - __put_user (stat->mode, &ubuf->st_mode) || - __put_user (stat->nlink, &ubuf->st_nlink) || - __put_user (uid, &ubuf->st_uid) || - __put_user (gid, &ubuf->st_gid) || - __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) || - __put_user (stat->size, &ubuf->st_size) || - __put_user (stat->atime.tv_sec, &ubuf->st_atime) || - __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) || - __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) || - __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user (stat->blksize, &ubuf->st_blksize) || - __put_user (stat->blocks, &ubuf->st_blocks)) + __put_user(stat->ino, &ubuf->__st_ino) || + __put_user(stat->ino, &ubuf->st_ino) || + __put_user(stat->mode, &ubuf->st_mode) || + __put_user(stat->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) || + __put_user(stat->size, &ubuf->st_size) || + __put_user(stat->atime.tv_sec, &ubuf->st_atime) || + __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(stat->blksize, &ubuf->st_blksize) || + __put_user(stat->blocks, &ubuf->st_blocks)) return -EFAULT; return 0; } -asmlinkage long -sys32_stat64(char __user * filename, struct stat64 __user *statbuf) +asmlinkage long sys32_stat64(char __user *filename, + struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); + if (!ret) ret = cp_stat64(statbuf, &stat); return ret; } -asmlinkage long -sys32_lstat64(char __user * filename, struct stat64 __user *statbuf) +asmlinkage long sys32_lstat64(char __user *filename, + struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -174,8 +176,7 @@ sys32_lstat64(char __user * filename, struct stat64 __user *statbuf) return ret; } -asmlinkage long -sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -184,9 +185,8 @@ sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) return ret; } -asmlinkage long -sys32_fstatat(unsigned int dfd, char __user *filename, - struct stat64 __user* statbuf, int flag) +asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, + struct stat64 __user *statbuf, int flag) { struct kstat stat; int error = -EINVAL; @@ -221,8 +221,7 @@ struct mmap_arg_struct { unsigned int offset; }; -asmlinkage long -sys32_mmap(struct mmap_arg_struct __user *arg) +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) { struct mmap_arg_struct a; struct file *file = NULL; @@ -233,33 +232,33 @@ sys32_mmap(struct mmap_arg_struct __user *arg) return -EFAULT; if (a.offset & ~PAGE_MASK) - return -EINVAL; + return -EINVAL; if (!(a.flags & MAP_ANONYMOUS)) { file = fget(a.fd); if (!file) return -EBADF; } - - mm = current->mm; - down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT); + + mm = current->mm; + down_write(&mm->mmap_sem); + retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, + a.offset>>PAGE_SHIFT); if (file) fput(file); - up_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); return retval; } -asmlinkage long -sys32_mprotect(unsigned long start, size_t len, unsigned long prot) +asmlinkage long sys32_mprotect(unsigned long start, size_t len, + unsigned long prot) { - return sys_mprotect(start,len,prot); + return sys_mprotect(start, len, prot); } -asmlinkage long -sys32_pipe(int __user *fd) +asmlinkage long sys32_pipe(int __user *fd) { int retval; int fds[2]; @@ -269,13 +268,13 @@ sys32_pipe(int __user *fd) goto out; if (copy_to_user(fd, fds, sizeof(fds))) retval = -EFAULT; - out: +out: return retval; } -asmlinkage long -sys32_rt_sigaction(int sig, struct sigaction32 __user *act, - struct sigaction32 __user *oact, unsigned int sigsetsize) +asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, + struct sigaction32 __user *oact, + unsigned int sigsetsize) { struct k_sigaction new_ka, old_ka; int ret; @@ -291,12 +290,17 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act, if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(restorer, &act->sa_restorer)|| - __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t))) + __get_user(restorer, &act->sa_restorer) || + __copy_from_user(&set32, &act->sa_mask, + sizeof(compat_sigset_t))) return -EFAULT; new_ka.sa.sa_handler = compat_ptr(handler); new_ka.sa.sa_restorer = compat_ptr(restorer); - /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ switch (_NSIG_WORDS) { case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] | (((long)set32.sig[7]) << 32); @@ -312,7 +316,10 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ switch (_NSIG_WORDS) { case 4: set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); @@ -328,23 +335,26 @@ sys32_rt_sigaction(int sig, struct sigaction32 __user *act, set32.sig[0] = old_ka.sa.sa_mask.sig[0]; } if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t))) + __copy_to_user(&oact->sa_mask, &set32, + sizeof(compat_sigset_t))) return -EFAULT; } return ret; } -asmlinkage long -sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact) +asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) { - struct k_sigaction new_ka, old_ka; - int ret; + struct k_sigaction new_ka, old_ka; + int ret; - if (act) { + if (act) { compat_old_sigset_t mask; compat_uptr_t handler, restorer; @@ -359,33 +369,35 @@ sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigacti new_ka.sa.sa_restorer = compat_ptr(restorer); siginitset(&new_ka.sa.sa_mask, mask); - } + } - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - } + } return ret; } -asmlinkage long -sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, unsigned int sigsetsize) +asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, + unsigned int sigsetsize) { sigset_t s; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); - + if (set) { - if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + if (copy_from_user(&s32, set, sizeof(compat_sigset_t))) return -EFAULT; switch (_NSIG_WORDS) { case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); @@ -394,13 +406,14 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_rt_sigprocmask(how, set ? (sigset_t __user *)&s : NULL, oset ? (sigset_t __user *)&s : NULL, - sigsetsize); - set_fs (old_fs); - if (ret) return ret; + sigsetsize); + set_fs(old_fs); + if (ret) + return ret; if (oset) { switch (_NSIG_WORDS) { case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; @@ -408,52 +421,49 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; } - if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + if (copy_to_user(oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } return 0; } -static inline long -get_tv32(struct timeval *o, struct compat_timeval __user *i) +static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) { - int err = -EFAULT; - if (access_ok(VERIFY_READ, i, sizeof(*i))) { + int err = -EFAULT; + + if (access_ok(VERIFY_READ, i, sizeof(*i))) { err = __get_user(o->tv_sec, &i->tv_sec); err |= __get_user(o->tv_usec, &i->tv_usec); } - return err; + return err; } -static inline long -put_tv32(struct compat_timeval __user *o, struct timeval *i) +static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) { int err = -EFAULT; - if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { + + if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { err = __put_user(i->tv_sec, &o->tv_sec); err |= __put_user(i->tv_usec, &o->tv_usec); - } - return err; + } + return err; } -extern unsigned int alarm_setitimer(unsigned int seconds); - -asmlinkage long -sys32_alarm(unsigned int seconds) +asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +/* + * Translations due to time_t size differences. Which affects all + * sorts of things, like timeval and itimerval. + */ +asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) { if (tv) { struct timeval ktv; + do_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; @@ -465,14 +475,14 @@ sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) return 0; } -asmlinkage long -sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) { struct timeval ktv; struct timespec kts; struct timezone ktz; - if (tv) { + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; kts.tv_sec = ktv.tv_sec; @@ -494,8 +504,7 @@ struct sel_arg_struct { unsigned int tvp; }; -asmlinkage long -sys32_old_select(struct sel_arg_struct __user *arg) +asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg) { struct sel_arg_struct a; @@ -505,50 +514,45 @@ sys32_old_select(struct sel_arg_struct __user *arg) compat_ptr(a.exp), compat_ptr(a.tvp)); } -extern asmlinkage long -compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options, - struct compat_rusage *ru); - -asmlinkage long -sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options) +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, + int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); } /* 32-bit timeval and related flotsam. */ -asmlinkage long -sys32_sysfs(int option, u32 arg1, u32 arg2) +asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); } -asmlinkage long -sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) { struct timespec t; int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); - set_fs (old_fs); + set_fs(old_fs); if (put_compat_timespec(&t, interval)) return -EFAULT; return ret; } -asmlinkage long -sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); + + set_fs(KERNEL_DS); ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); - set_fs (old_fs); + set_fs(old_fs); if (!ret) { switch (_NSIG_WORDS) { case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; @@ -556,30 +560,29 @@ sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; } - if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + if (copy_to_user(set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } return ret; } -asmlinkage long -sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) +asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, + compat_siginfo_t __user *uinfo) { siginfo_t info; int ret; mm_segment_t old_fs = get_fs(); - + if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); - set_fs (old_fs); + set_fs(old_fs); return ret; } /* These are here just in case some old ia32 binary calls it. */ -asmlinkage long -sys32_pause(void) +asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; schedule(); @@ -599,25 +602,25 @@ struct sysctl_ia32 { }; -asmlinkage long -sys32_sysctl(struct sysctl_ia32 __user *args32) +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) { struct sysctl_ia32 a32; - mm_segment_t old_fs = get_fs (); + mm_segment_t old_fs = get_fs(); void __user *oldvalp, *newvalp; size_t oldlen; int __user *namep; long ret; - if (copy_from_user(&a32, args32, sizeof (a32))) + if (copy_from_user(&a32, args32, sizeof(a32))) return -EFAULT; /* - * We need to pre-validate these because we have to disable address checking - * before calling do_sysctl() because of OLDLEN but we can't run the risk of the - * user specifying bad addresses here. Well, since we're dealing with 32 bit - * addresses, we KNOW that access_ok() will always succeed, so this is an - * expensive NOP, but so what... + * We need to pre-validate these because we have to disable + * address checking before calling do_sysctl() because of + * OLDLEN but we can't run the risk of the user specifying bad + * addresses here. Well, since we're dealing with 32 bit + * addresses, we KNOW that access_ok() will always succeed, so + * this is an expensive NOP, but so what... */ namep = compat_ptr(a32.name); oldvalp = compat_ptr(a32.oldval); @@ -636,34 +639,34 @@ sys32_sysctl(struct sysctl_ia32 __user *args32) unlock_kernel(); set_fs(old_fs); - if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp))) + if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) return -EFAULT; return ret; } #endif -/* warning: next two assume little endian */ -asmlinkage long -sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +/* warning: next two assume little endian */ +asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) { return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long -sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) { return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long -sys32_personality(unsigned long personality) +asmlinkage long sys32_personality(unsigned long personality) { int ret; - if (personality(current->personality) == PER_LINUX32 && + + if (personality(current->personality) == PER_LINUX32 && personality == PER_LINUX) personality = PER_LINUX32; ret = sys_personality(personality); @@ -672,34 +675,33 @@ sys32_personality(unsigned long personality) return ret; } -asmlinkage long -sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) +asmlinkage long sys32_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, s32 count) { mm_segment_t old_fs = get_fs(); int ret; off_t of; - + if (offset && get_user(of, offset)) return -EFAULT; - + set_fs(KERNEL_DS); ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, count); set_fs(old_fs); - + if (offset && put_user(of, offset)) return -EFAULT; - return ret; } asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct mm_struct *mm = current->mm; unsigned long error; - struct file * file = NULL; + struct file *file = NULL; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -717,36 +719,35 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, return error; } -asmlinkage long sys32_olduname(struct oldold_utsname __user * name) +asmlinkage long sys32_olduname(struct oldold_utsname __user *name) { + char *arch = "x86_64"; int err; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) return -EFAULT; - - down_read(&uts_sem); - - err = __copy_to_user(&name->sysname,&utsname()->sysname, - __OLD_UTS_LEN); - err |= __put_user(0,name->sysname+__OLD_UTS_LEN); - err |= __copy_to_user(&name->nodename,&utsname()->nodename, - __OLD_UTS_LEN); - err |= __put_user(0,name->nodename+__OLD_UTS_LEN); - err |= __copy_to_user(&name->release,&utsname()->release, - __OLD_UTS_LEN); - err |= __put_user(0,name->release+__OLD_UTS_LEN); - err |= __copy_to_user(&name->version,&utsname()->version, - __OLD_UTS_LEN); - err |= __put_user(0,name->version+__OLD_UTS_LEN); - { - char *arch = "x86_64"; - if (personality(current->personality) == PER_LINUX32) - arch = "i686"; - - err |= __copy_to_user(&name->machine, arch, strlen(arch)+1); - } + + down_read(&uts_sem); + + err = __copy_to_user(&name->sysname, &utsname()->sysname, + __OLD_UTS_LEN); + err |= __put_user(0, name->sysname+__OLD_UTS_LEN); + err |= __copy_to_user(&name->nodename, &utsname()->nodename, + __OLD_UTS_LEN); + err |= __put_user(0, name->nodename+__OLD_UTS_LEN); + err |= __copy_to_user(&name->release, &utsname()->release, + __OLD_UTS_LEN); + err |= __put_user(0, name->release+__OLD_UTS_LEN); + err |= __copy_to_user(&name->version, &utsname()->version, + __OLD_UTS_LEN); + err |= __put_user(0, name->version+__OLD_UTS_LEN); + + if (personality(current->personality) == PER_LINUX32) + arch = "i686"; + + err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1); up_read(&uts_sem); @@ -755,17 +756,19 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name) return err; } -long sys32_uname(struct old_utsname __user * name) +long sys32_uname(struct old_utsname __user *name) { int err; + if (!name) return -EFAULT; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) + if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); - return err?-EFAULT:0; + + return err ? -EFAULT : 0; } long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) @@ -773,27 +776,28 @@ long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) struct ustat u; mm_segment_t seg; int ret; - - seg = get_fs(); - set_fs(KERNEL_DS); + + seg = get_fs(); + set_fs(KERNEL_DS); ret = sys_ustat(dev, (struct ustat __user *)&u); set_fs(seg); - if (ret >= 0) { - if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || - __put_user((__u32) u.f_tfree, &u32p->f_tfree) || - __put_user((__u32) u.f_tinode, &u32p->f_tfree) || - __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || - __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) - ret = -EFAULT; - } + if (ret < 0) + return ret; + + if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) || + __put_user((__u32) u.f_tfree, &u32p->f_tfree) || + __put_user((__u32) u.f_tinode, &u32p->f_tfree) || + __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || + __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) + ret = -EFAULT; return ret; -} +} asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -814,16 +818,17 @@ asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, { void __user *parent_tid = (void __user *)regs->rdx; void __user *child_tid = (void __user *)regs->rdi; + if (!newsp) newsp = regs->rsp; - return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } /* - * Some system calls that need sign extended arguments. This could be done by a generic wrapper. - */ - -long sys32_lseek (unsigned int fd, int offset, unsigned int whence) + * Some system calls that need sign extended arguments. This could be + * done by a generic wrapper. + */ +long sys32_lseek(unsigned int fd, int offset, unsigned int whence) { return sys_lseek(fd, offset, whence); } @@ -832,49 +837,52 @@ long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); } - -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, + +long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, __u32 len_low, __u32 len_high, int advice) -{ +{ return sys_fadvise64_64(fd, (((u64)offset_high)<<32) | offset_low, (((u64)len_high)<<32) | len_low, - advice); -} + advice); +} long sys32_vm86_warning(void) -{ +{ struct task_struct *me = current; static char lastcomm[sizeof(me->comm)]; + if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { - compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n", - me->comm); + compat_printk(KERN_INFO + "%s: vm86 mode not supported on 64 bit kernel\n", + me->comm); strncpy(lastcomm, me->comm, sizeof(lastcomm)); - } + } return -ENOSYS; -} +} long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, - char __user * buf, size_t len) + char __user *buf, size_t len) { return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count) +asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, + size_t count) { return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, - unsigned n_low, unsigned n_hi, int flags) + unsigned n_low, unsigned n_hi, int flags) { return sys_sync_file_range(fd, ((u64)off_hi << 32) | off_low, ((u64)n_hi << 32) | n_low, flags); } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len, - int advice) +asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, + size_t len, int advice) { return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, len, advice); diff --git a/include/linux/compat.h b/include/linux/compat.h index 0e69d2cf14aa..ba29d4c59643 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -191,6 +191,10 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timeval __user *tvp); +asmlinkage long compat_sys_wait4(compat_pid_t pid, + compat_uint_t *stat_addr, int options, + struct compat_rusage *ru); + #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) \ -- cgit v1.3-8-gc7d7 From 70a20025632ca320316b5068326784d07c8ff351 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:18 +0100 Subject: x86: move pmtmr related declarations Move more stuff out of proto.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 + arch/x86/kernel/pmtimer_64.c | 4 ++-- include/asm-x86/proto.h | 9 --------- include/linux/acpi_pmtmr.h | 2 ++ 4 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 3de3764a862c..0cb14d4c2c5c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c index ae8f91214f15..b112406f1996 100644 --- a/arch/x86/kernel/pmtimer_64.c +++ b/arch/x86/kernel/pmtimer_64.c @@ -19,13 +19,13 @@ #include #include #include +#include + #include #include #include #include -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - static inline u32 cyc2us(u32 cycles) { /* The Power Management Timer ticks at 3.579545 ticks per microsecond. diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index a47e526716f4..9074aa7ebc65 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -25,15 +25,6 @@ extern void ia32_sysenter_target(void); extern void config_acpi_tables(void); extern void ia32_syscall(void); -extern int pmtimer_mark_offset(void); -extern void pmtimer_resume(void); -extern void pmtimer_wait(unsigned); -extern unsigned int do_gettimeoffset_pm(void); -#ifdef CONFIG_X86_PM_TIMER -extern u32 pmtmr_ioport; -#else -#define pmtmr_ioport 0 -#endif extern int nohpet; extern void reserve_bootmem_generic(unsigned long phys, unsigned len); diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 1d0ef1ae8036..7e3d2859be50 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -25,6 +25,8 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } +extern void pmtimer_wait(unsigned); + #else static inline u32 acpi_pm_read_early(void) -- cgit v1.3-8-gc7d7 From 02456c708eab4515121e5e581422fa3be14368d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:27 +0100 Subject: x86: nuke a ton of dead hpet code No users, just ballast Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- drivers/char/hpet.c | 75 ---------------------------------------------------- include/linux/hpet.h | 3 --- 2 files changed, 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 22f5fd02ea87..465ad35ed38f 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -600,63 +600,6 @@ static int hpet_is_known(struct hpet_data *hdp) return 0; } -EXPORT_SYMBOL(hpet_alloc); -EXPORT_SYMBOL(hpet_register); -EXPORT_SYMBOL(hpet_unregister); -EXPORT_SYMBOL(hpet_control); - -int hpet_register(struct hpet_task *tp, int periodic) -{ - unsigned int i; - u64 mask; - struct hpet_timer __iomem *timer; - struct hpet_dev *devp; - struct hpets *hpetp; - - switch (periodic) { - case 1: - mask = Tn_PER_INT_CAP_MASK; - break; - case 0: - mask = 0; - break; - default: - return -EINVAL; - } - - tp->ht_opaque = NULL; - - spin_lock_irq(&hpet_task_lock); - spin_lock(&hpet_lock); - - for (devp = NULL, hpetp = hpets; hpetp && !devp; hpetp = hpetp->hp_next) - for (timer = hpetp->hp_hpet->hpet_timers, i = 0; - i < hpetp->hp_ntimer; i++, timer++) { - if ((readq(&timer->hpet_config) & Tn_PER_INT_CAP_MASK) - != mask) - continue; - - devp = &hpetp->hp_dev[i]; - - if (devp->hd_flags & HPET_OPEN || devp->hd_task) { - devp = NULL; - continue; - } - - tp->ht_opaque = devp; - devp->hd_task = tp; - break; - } - - spin_unlock(&hpet_lock); - spin_unlock_irq(&hpet_task_lock); - - if (tp->ht_opaque) - return 0; - else - return -EBUSY; -} - static inline int hpet_tpcheck(struct hpet_task *tp) { struct hpet_dev *devp; @@ -706,24 +649,6 @@ int hpet_unregister(struct hpet_task *tp) return 0; } -int hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg) -{ - struct hpet_dev *devp; - int err; - - if ((err = hpet_tpcheck(tp))) - return err; - - spin_lock_irq(&hpet_lock); - devp = tp->ht_opaque; - if (devp->hd_task != tp) { - spin_unlock_irq(&hpet_lock); - return -ENXIO; - } - spin_unlock_irq(&hpet_lock); - return hpet_ioctl_common(devp, cmd, arg, 1); -} - static ctl_table hpet_table[] = { { .ctl_name = CTL_UNNUMBERED, diff --git a/include/linux/hpet.h b/include/linux/hpet.h index e3c0b2aa944c..9cd94bfd07e5 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -115,9 +115,6 @@ static inline void hpet_reserve_timer(struct hpet_data *hd, int timer) } int hpet_alloc(struct hpet_data *); -int hpet_register(struct hpet_task *, int); -int hpet_unregister(struct hpet_task *); -int hpet_control(struct hpet_task *, unsigned int, unsigned long); #endif /* __KERNEL__ */ -- cgit v1.3-8-gc7d7 From c9cce83dd1d59f52e2c8f8c7d265ba4854c40785 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 30 Jan 2008 13:30:32 +0100 Subject: x86: remove extern declarations for code, data, bss resources This patch removes the extern struct resource declarations for data_resource, code_resource and bss_resource on x86 and declares that three structures as static as done on other architectures like IA64. On i386, these structures are moved to setup_32.c (from e820_32.c) because that's code that is not specific to e820 and also required on EFI systems. That makes the "extern" reference superfluous. On x86_64, data_resource, code_resource and bss_resource are passed to e820_reserve_resources() as arguments just as done on i386 and IA64. That also avoids the "extern" reference and it's possible to make it static. Signed-off-by: Bernhard Walle Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_32.c | 110 ++------------------------------------------- arch/x86/kernel/e820_64.c | 11 +++-- arch/x86/kernel/setup_32.c | 106 +++++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/setup_64.c | 8 ++-- include/asm-x86/e820_32.h | 6 +++ include/asm-x86/e820_64.h | 5 ++- include/linux/ioport.h | 2 + 7 files changed, 127 insertions(+), 121 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 18f500d185a2..87cadc86d5ee 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -37,26 +37,6 @@ unsigned long pci_mem_start = 0x10000000; EXPORT_SYMBOL(pci_mem_start); #endif extern int user_defined_memmap; -struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; static struct resource system_rom_resource = { .name = "System ROM", @@ -111,60 +91,6 @@ static struct resource video_rom_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM }; -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - #define ROMSIGNATURE 0xaa55 static int __init romsignature(const unsigned char *rom) @@ -260,10 +186,9 @@ static void __init probe_roms(void) * Request address space for all standard RAM and ROM resources * and also for regions reported as reserved by the e820. */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource) +void __init legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -305,35 +230,6 @@ legacy_init_iomem_resources(struct resource *code_resource, } } -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, - &data_resource, &bss_resource); - else - legacy_init_iomem_resources(&code_resource, - &data_resource, &bss_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); - #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) /** * e820_mark_nosave_regions - Find the ranges of physical addresses that do not diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 11a3d65db0c1..151236896243 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -47,8 +47,6 @@ unsigned long end_pfn_map; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource, bss_resource; - /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) { @@ -213,7 +211,8 @@ unsigned long __init e820_end_of_ram(void) /* * Mark e820 reserved areas as busy for the resource manager. */ -void __init e820_reserve_resources(void) +void __init e820_reserve_resources(struct resource *code_resource, + struct resource *data_resource, struct resource *bss_resource) { int i; for (i = 0; i < e820.nr_map; i++) { @@ -235,9 +234,9 @@ void __init e820_reserve_resources(void) * so we try it repeatedly and let the resource manager * test it. */ - request_resource(res, &code_resource); - request_resource(res, &data_resource); - request_resource(res, &bss_resource); + request_resource(res, code_resource); + request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) request_resource(res, &crashk_res); diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 236d30b264d8..32edf70d6b0d 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -73,9 +73,80 @@ int disable_pse __cpuinitdata = 0; /* * Machine setup.. */ -extern struct resource code_resource; -extern struct resource data_resource; -extern struct resource bss_resource; +static struct resource data_resource = { + .name = "Kernel data", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource video_ram_resource = { + .name = "Video RAM area", + .start = 0xa0000, + .end = 0xbffff, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource standard_io_resources[] = { { + .name = "dma1", + .start = 0x0000, + .end = 0x001f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic1", + .start = 0x0020, + .end = 0x0021, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer0", + .start = 0x0040, + .end = 0x0043, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer1", + .start = 0x0050, + .end = 0x0053, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "keyboard", + .start = 0x0060, + .end = 0x006f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma page reg", + .start = 0x0080, + .end = 0x008f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic2", + .start = 0x00a0, + .end = 0x00a1, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma2", + .start = 0x00c0, + .end = 0x00df, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "fpu", + .start = 0x00f0, + .end = 0x00ff, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +} }; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -693,3 +764,32 @@ void __init setup_arch(char **cmdline_p) #endif #endif } + +/* + * Request address space for all standard resources + * + * This is called just before pcibios_init(), which is also a + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). + */ +static int __init request_standard_resources(void) +{ + int i; + + printk(KERN_INFO "Setting up standard PCI resources\n"); + if (efi_enabled) + efi_initialize_iomem_resources(&code_resource, + &data_resource, &bss_resource); + else + legacy_init_iomem_resources(&code_resource, + &data_resource, &bss_resource); + + /* EFI systems may still have VGA */ + request_resource(&iomem_resource, &video_ram_resource); + + /* request I/O space for devices used on all i[345]86 PCs */ + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +subsys_initcall(request_standard_resources); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index bcb5f3aaa097..1acb435a0585 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -123,19 +123,19 @@ struct resource standard_io_resources[] = { #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) -struct resource data_resource = { +static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, .flags = IORESOURCE_RAM, }; -struct resource code_resource = { +static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, .flags = IORESOURCE_RAM, }; -struct resource bss_resource = { +static struct resource bss_resource = { .name = "Kernel bss", .start = 0, .end = 0, @@ -438,7 +438,7 @@ void __init setup_arch(char **cmdline_p) /* * We trust e820 completely. No explicit ROM probing in memory. */ - e820_reserve_resources(); + e820_reserve_resources(&code_resource, &data_resource, &bss_resource); e820_mark_nosave_regions(); { diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index 03f60c690c8a..ae5ea19623fa 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -12,6 +12,8 @@ #ifndef __E820_HEADER #define __E820_HEADER +#include + #define HIGH_MEMORY (1024*1024) #ifndef __ASSEMBLY__ @@ -26,6 +28,9 @@ extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); extern void print_memory_map(char *who); +extern void legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource); #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) extern void e820_mark_nosave_regions(void); @@ -35,5 +40,6 @@ static inline void e820_mark_nosave_regions(void) } #endif + #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index e535e6044e21..1c7ba8804176 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -11,6 +11,8 @@ #ifndef __E820_HEADER #define __E820_HEADER +#include + #ifndef __ASSEMBLY__ extern unsigned long find_e820_area(unsigned long start, unsigned long end, unsigned size); @@ -19,7 +21,8 @@ extern void add_memory_region(unsigned long start, unsigned long size, extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); -extern void e820_reserve_resources(void); +extern void e820_reserve_resources(struct resource *code_resource, + struct resource *data_resource, struct resource *bss_resource); extern void e820_mark_nosave_regions(void); extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6187a8567bc7..605d237364d2 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -8,6 +8,7 @@ #ifndef _LINUX_IOPORT_H #define _LINUX_IOPORT_H +#ifndef __ASSEMBLY__ #include #include /* @@ -153,4 +154,5 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ -- cgit v1.3-8-gc7d7 From fb7fa8f1741c91f6c6e958762155abe9339476ca Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:47 +0100 Subject: ptrace: arch_has_single_step This defines the new macro arch_has_single_step() in linux/ptrace.h, a default for when asm/ptrace.h does not define it. It declares the new user_enable_single_step and user_disable_single_step functions. This is not used yet, but paves the way to harmonize on this interface for the arch-specific calls on all machines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ptrace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 3ea5750a0f7e..8f06c6fb22a5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -129,6 +129,52 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); #define force_successful_syscall_return() do { } while (0) #endif +/* + * should define the following things inside #ifdef __KERNEL__. + * + * These do-nothing inlines are used when the arch does not + * implement single-step. The kerneldoc comments are here + * to document the interface for all arch definitions. + */ + +#ifndef arch_has_single_step +/** + * arch_has_single_step - does this CPU support user-mode single-step? + * + * If this is defined, then there must be function declarations or + * inlines for user_enable_single_step() and user_disable_single_step(). + * arch_has_single_step() should evaluate to nonzero iff the machine + * supports instruction single-step for user mode. + * It can be a constant or it can test a CPU feature bit. + */ +#define arch_has_single_step() (0) + +/** + * user_enable_single_step - single-step in user-mode task + * @task: either current or a task stopped in %TASK_TRACED + * + * This can only be called when arch_has_single_step() has returned nonzero. + * Set @task so that when it returns to user mode, it will trap after the + * next single instruction executes. + */ +static inline void user_enable_single_step(struct task_struct *task) +{ + BUG(); /* This can never be called. */ +} + +/** + * user_disable_single_step - cancel user-mode single-step + * @task: either current or a task stopped in %TASK_TRACED + * + * Clear @task of the effects of user_enable_single_step(). This can + * be called whether or not user_enable_single_step() was ever called + * on @task, and even if arch_has_single_step() returned zero. + */ +static inline void user_disable_single_step(struct task_struct *task) +{ +} +#endif /* arch_has_single_step */ + #endif #endif -- cgit v1.3-8-gc7d7 From dc802c2d2e66e2d1544e023bfd4be6cdee48d57b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:53 +0100 Subject: ptrace: arch_has_block_step This defines the new macro arch_has_block_step() in linux/ptrace.h, a default for when asm/ptrace.h does not define it. This is the analog of arch_has_single_step() for step-until-branch on machines that have it. It declares the new user_enable_block_step function, which goes with the existing user_enable_single_step and user_disable_single_step. This is not used yet, but paves the way to harmonize on this interface for the arch-specific calls on all machines. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ptrace.h | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 8f06c6fb22a5..1febc541dda5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -155,7 +155,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); * * This can only be called when arch_has_single_step() has returned nonzero. * Set @task so that when it returns to user mode, it will trap after the - * next single instruction executes. + * next single instruction executes. If arch_has_block_step() is defined, + * this must clear the effects of user_enable_block_step() too. */ static inline void user_enable_single_step(struct task_struct *task) { @@ -166,15 +167,43 @@ static inline void user_enable_single_step(struct task_struct *task) * user_disable_single_step - cancel user-mode single-step * @task: either current or a task stopped in %TASK_TRACED * - * Clear @task of the effects of user_enable_single_step(). This can - * be called whether or not user_enable_single_step() was ever called - * on @task, and even if arch_has_single_step() returned zero. + * Clear @task of the effects of user_enable_single_step() and + * user_enable_block_step(). This can be called whether or not either + * of those was ever called on @task, and even if arch_has_single_step() + * returned zero. */ static inline void user_disable_single_step(struct task_struct *task) { } #endif /* arch_has_single_step */ +#ifndef arch_has_block_step +/** + * arch_has_block_step - does this CPU support user-mode block-step? + * + * If this is defined, then there must be a function declaration or inline + * for user_enable_block_step(), and arch_has_single_step() must be defined + * too. arch_has_block_step() should evaluate to nonzero iff the machine + * supports step-until-branch for user mode. It can be a constant or it + * can test a CPU feature bit. + */ +#define arch_has_single_step() (0) + +/** + * user_enable_block_step - step until branch in user-mode task + * @task: either current or a task stopped in %TASK_TRACED + * + * This can only be called when arch_has_block_step() has returned nonzero, + * and will never be called when single-instruction stepping is being used. + * Set @task so that when it returns to user mode, it will trap after the + * next branch or trap taken. + */ +static inline void user_enable_block_step(struct task_struct *task) +{ + BUG(); /* This can never be called. */ +} +#endif /* arch_has_block_step */ + #endif #endif -- cgit v1.3-8-gc7d7 From 5b88abbf770a0e1975c668743100f42934f385e8 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:53 +0100 Subject: ptrace: generic PTRACE_SINGLEBLOCK This makes ptrace_request handle PTRACE_SINGLEBLOCK along with PTRACE_CONT et al. The new generic code makes use of the arch_has_block_step macro and generic entry points on machines that define them. [ mingo@elte.hu: bugfix ] Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ptrace.h | 2 +- kernel/ptrace.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 1febc541dda5..515bff053de8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -187,7 +187,7 @@ static inline void user_disable_single_step(struct task_struct *task) * supports step-until-branch for user mode. It can be a constant or it * can test a CPU feature bit. */ -#define arch_has_single_step() (0) +#define arch_has_block_step() (0) /** * user_enable_block_step - step until branch in user-mode task diff --git a/kernel/ptrace.c b/kernel/ptrace.c index fad5f1e8511f..973d727f5e84 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -373,6 +373,12 @@ static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data) #define is_singlestep(request) 0 #endif +#ifdef PTRACE_SINGLEBLOCK +#define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK) +#else +#define is_singleblock(request) 0 +#endif + #ifdef PTRACE_SYSEMU #define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP) #else @@ -396,7 +402,11 @@ static int ptrace_resume(struct task_struct *child, long request, long data) clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (is_singlestep(request) || is_sysemu_singlestep(request)) { + if (is_singleblock(request)) { + if (unlikely(!arch_has_block_step())) + return -EIO; + user_enable_block_step(child); + } else if (is_singlestep(request) || is_sysemu_singlestep(request)) { if (unlikely(!arch_has_single_step())) return -EIO; user_enable_single_step(child); @@ -438,6 +448,9 @@ int ptrace_request(struct task_struct *child, long request, #ifdef PTRACE_SINGLESTEP case PTRACE_SINGLESTEP: #endif +#ifdef PTRACE_SINGLEBLOCK + case PTRACE_SINGLEBLOCK: +#endif #ifdef PTRACE_SYSEMU case PTRACE_SYSEMU: case PTRACE_SYSEMU_SINGLESTEP: -- cgit v1.3-8-gc7d7 From 5548fecdff5617ba3a2f09f0e585e1ac6e1bd25c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 30 Jan 2008 13:30:55 +0100 Subject: x86: clean up bitops-related warnings Add casts to appropriate places to silence spurious bitops warnings. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup_64.c | 31 ++++++++++++++++--------------- arch/x86/kernel/smpboot_64.c | 4 ++-- arch/x86/mm/numa_64.c | 2 +- include/asm-x86/cpufeature.h | 2 +- include/asm-x86/numa_64.h | 2 +- include/linux/thread_info.h | 10 +++++----- 6 files changed, 26 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 84f66b7b4d2e..63dd39b843b5 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -661,19 +661,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, &c->x86_capability); + clear_bit(0*32+31, (unsigned long *)&c->x86_capability); /* On C+ stepping K8 rep microcode works well for copy/memset */ level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) - set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability); if (c->x86 == 0x10 || c->x86 == 0x11) - set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability); /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) - set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); + set_bit(X86_FEATURE_FXSAVE_LEAK, (unsigned long *)&c->x86_capability); level = get_model_name(c); if (!level) { @@ -689,7 +689,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) @@ -702,14 +702,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) - set_bit(X86_FEATURE_K8, &c->x86_capability); + set_bit(X86_FEATURE_K8, (unsigned long *)&c->x86_capability); /* RDTSC can be speculated around */ - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability); /* Family 10 doesn't support C states in MWAIT so don't use it */ if (c->x86 == 0x10 && !force_mwait) - clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); + clear_bit(X86_FEATURE_MWAIT, (unsigned long *)&c->x86_capability); if (amd_apic_timer_broken()) disable_apic_timer = 1; @@ -811,16 +811,17 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); + set_bit(X86_FEATURE_ARCH_PERFMON, + (unsigned long *)&c->x86_capability); } if (cpu_has_ds) { unsigned int l1, l2; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) - set_bit(X86_FEATURE_BTS, c->x86_capability); + set_bit(X86_FEATURE_BTS, (unsigned long *)c->x86_capability); if (!(l1 & (1<<12))) - set_bit(X86_FEATURE_PEBS, c->x86_capability); + set_bit(X86_FEATURE_PEBS, (unsigned long *)c->x86_capability); } n = c->extended_cpuid_level; @@ -839,13 +840,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size * 2; if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_bit(X86_FEATURE_CONSTANT_TSC, (unsigned long *)&c->x86_capability); if (c->x86 == 6) - set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + set_bit(X86_FEATURE_REP_GOOD, (unsigned long *)&c->x86_capability); if (c->x86 == 15) - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability); else - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + clear_bit(X86_FEATURE_SYNC_RDTSC, (unsigned long *)&c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 8ac8eb620428..ac1089f2b917 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -691,7 +691,7 @@ do_rest: } if (boot_error) { cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ + clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ clear_node_cpumask(cpu); /* was set by numa_add_cpu */ cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, cpu_possible_map); @@ -1036,7 +1036,7 @@ void remove_cpu_from_maps(void) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); - clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ + clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */ clear_node_cpumask(cpu); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 46b4b5e1a02a..848231481619 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -547,7 +547,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) __cpuinit void numa_add_cpu(int cpu) { - set_bit(cpu, &node_to_cpumask_map[cpu_to_node(cpu)]); + set_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); } void __cpuinit numa_set_node(int cpu, int node) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index acbf6681740d..761922972f6f 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -124,7 +124,7 @@ (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \ (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ) \ ? 1 : \ - test_bit(bit, (c)->x86_capability)) + test_bit(bit, (unsigned long *)(c)->x86_capability)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index c3c20db1fba3..4449889bb0c0 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -32,7 +32,7 @@ extern void __init init_cpu_to_node(void); static inline void clear_node_cpumask(int cpu) { - clear_bit(cpu, &node_to_cpumask_map[cpu_to_node(cpu)]); + clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); } #else diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9c4ad755d7e5..dfbdfb9836f4 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,27 +42,27 @@ extern long do_no_restart_syscall(struct restart_block *parm); static inline void set_ti_thread_flag(struct thread_info *ti, int flag) { - set_bit(flag,&ti->flags); + set_bit(flag, (unsigned long *)&ti->flags); } static inline void clear_ti_thread_flag(struct thread_info *ti, int flag) { - clear_bit(flag,&ti->flags); + clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag) { - return test_and_set_bit(flag,&ti->flags); + return test_and_set_bit(flag, (unsigned long *)&ti->flags); } static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag) { - return test_and_clear_bit(flag,&ti->flags); + return test_and_clear_bit(flag, (unsigned long *)&ti->flags); } static inline int test_ti_thread_flag(struct thread_info *ti, int flag) { - return test_bit(flag,&ti->flags); + return test_bit(flag, (unsigned long *)&ti->flags); } #define set_thread_flag(flag) \ -- cgit v1.3-8-gc7d7 From 2355188570790930718fb72444cddc2959039d9d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:31:10 +0100 Subject: x86: avoid build warning fix this build warning: include/asm/topology_32.h: In function 'node_to_first_cpu': include/asm/topology_32.h:66: warning: unused variable 'mask' Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/cpumask.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 85bd790c201e..7047f58306a7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -218,8 +218,8 @@ int __first_cpu(const cpumask_t *srcp); int __next_cpu(int n, const cpumask_t *srcp); #define next_cpu(n, src) __next_cpu((n), &(src)) #else -#define first_cpu(src) 0 -#define next_cpu(n, src) 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) #endif #define cpumask_of_cpu(cpu) \ -- cgit v1.3-8-gc7d7 From 95c354fe9f7d6decc08a92aa26eb233ecc2155bf Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 30 Jan 2008 13:31:20 +0100 Subject: spinlock: lockbreak cleanup The break_lock data structure and code for spinlocks is quite nasty. Not only does it double the size of a spinlock but it changes locking to a potentially less optimal trylock. Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a __raw_spin_is_contended that uses the lock data itself to determine whether there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is not set. Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to decouple it from the spinlock implementation, and make it typesafe (rwlocks do not have any need_lockbreak sites -- why do they even get bloated up with that break_lock then?). Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/arm/Kconfig | 5 +++++ arch/ia64/Kconfig | 5 +++++ arch/m32r/Kconfig | 5 +++++ arch/mips/Kconfig | 5 +++++ arch/parisc/Kconfig | 5 +++++ arch/powerpc/Kconfig | 5 +++++ arch/sparc64/Kconfig | 5 +++++ arch/x86/Kconfig | 4 ++++ fs/jbd/checkpoint.c | 3 ++- fs/jbd/commit.c | 2 +- fs/jbd2/checkpoint.c | 3 ++- fs/jbd2/commit.c | 2 +- include/linux/sched.h | 21 +++++++-------------- include/linux/spinlock.h | 6 ++++++ include/linux/spinlock_types.h | 4 ++-- include/linux/spinlock_up.h | 2 ++ kernel/sched.c | 16 ++++++---------- kernel/spinlock.c | 3 +-- mm/memory.c | 8 +++----- 19 files changed, 72 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index de211ac3853e..77201d3f7479 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -91,6 +91,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index bef47725d4ad..4a81b7fb191a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -42,6 +42,11 @@ config MMU config SWIOTLB bool +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_XCHGADD_ALGORITHM bool default y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index ab9a264cb194..f7237c5f531e 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -235,6 +235,11 @@ config IRAM_SIZE # Define implied options from the CPU selection here # +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool depends on M32R diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 6b0f85f02c79..4fad0a34b997 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -694,6 +694,11 @@ source "arch/mips/vr41xx/Kconfig" endmenu +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b8ef1787a191..2b649c46631c 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -19,6 +19,11 @@ config MMU config STACK_GROWSUP def_bool y +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 232c298c933f..c17a194beb0e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -53,6 +53,11 @@ config RWSEM_XCHGADD_ALGORITHM bool default y +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config ARCH_HAS_ILOG2_U32 bool default y diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 10b212a1f9f5..1e25bce0366d 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -200,6 +200,11 @@ config US2E_FREQ If in doubt, say N. # Global things across all Sun machines. +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 23936301db56..db434f8171d3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -19,6 +19,10 @@ config X86_64 config X86 def_bool y +config GENERIC_LOCKBREAK + def_bool y + depends on SMP && PREEMPT + config GENERIC_TIME def_bool y diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 0f69c416eebc..a5432bbbfb88 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -347,7 +347,8 @@ restart: break; } retry = __process_buffer(journal, jh, bhs,&batch_count); - if (!retry && lock_need_resched(&journal->j_list_lock)){ + if (!retry && (need_resched() || + spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 610264b99a8e..31853eb65b4c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -265,7 +265,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1b7f282c1ae9..6914598022ce 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -353,7 +353,8 @@ restart: } retry = __process_buffer(journal, jh, bhs, &batch_count, transaction); - if (!retry && lock_need_resched(&journal->j_list_lock)){ + if (!retry && (need_resched() || + spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index da8d0eb3b7b9..4f302d279279 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -341,7 +341,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0546e884ea..9d4797609aa5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1922,23 +1922,16 @@ extern int cond_resched_softirq(void); /* * Does a critical section need to be broken due to another - * task waiting?: + * task waiting?: (technically does not depend on CONFIG_PREEMPT, + * but a general need for low latency) */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -# define need_lockbreak(lock) ((lock)->break_lock) -#else -# define need_lockbreak(lock) 0 -#endif - -/* - * Does a critical section need to be broken due to another - * task waiting or preemption being signalled: - */ -static inline int lock_need_resched(spinlock_t *lock) +static inline int spin_needbreak(spinlock_t *lock) { - if (need_lockbreak(lock) || need_resched()) - return 1; +#ifdef CONFIG_PREEMPT + return spin_is_contended(lock); +#else return 0; +#endif } /* diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index c376f3b36c89..124449733c55 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -120,6 +120,12 @@ do { \ #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) +#ifdef CONFIG_GENERIC_LOCKBREAK +#define spin_is_contended(lock) ((lock)->break_lock) +#else +#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#endif + /** * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index f6a3a951b79e..68d88f71f1a2 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,7 +19,7 @@ typedef struct { raw_spinlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -35,7 +35,7 @@ typedef struct { typedef struct { raw_rwlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index ea54c4c9a4ec..938234c4a996 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ +#define __raw_spin_is_contended(lock) (((void)(lock), 0)) + #define __raw_read_can_lock(lock) (((void)(lock), 1)) #define __raw_write_can_lock(lock) (((void)(lock), 1)) diff --git a/kernel/sched.c b/kernel/sched.c index 524285e46fa7..ba4c88088f62 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4945,19 +4945,15 @@ EXPORT_SYMBOL(_cond_resched); */ int cond_resched_lock(spinlock_t *lock) { + int resched = need_resched() && system_state == SYSTEM_RUNNING; int ret = 0; - if (need_lockbreak(lock)) { + if (spin_needbreak(lock) || resched) { spin_unlock(lock); - cpu_relax(); - ret = 1; - spin_lock(lock); - } - if (need_resched() && system_state == SYSTEM_RUNNING) { - spin_release(&lock->dep_map, 1, _THIS_IP_); - _raw_spin_unlock(lock); - preempt_enable_no_resched(); - __cond_resched(); + if (resched && need_resched()) + __cond_resched(); + else + cpu_relax(); ret = 1; spin_lock(lock); } diff --git a/kernel/spinlock.c b/kernel/spinlock.c index cd72424c2662..ae28c8245123 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock); * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ -#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \ - defined(CONFIG_DEBUG_LOCK_ALLOC) +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) void __lockfunc _read_lock(rwlock_t *lock) { diff --git a/mm/memory.c b/mm/memory.c index 4b0144b24c12..673ebbf499c7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -513,8 +513,7 @@ again: if (progress >= 32) { progress = 0; if (need_resched() || - need_lockbreak(src_ptl) || - need_lockbreak(dst_ptl)) + spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) break; } if (pte_none(*src_pte)) { @@ -853,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, tlb_finish_mmu(*tlbp, tlb_start, start); if (need_resched() || - (i_mmap_lock && need_lockbreak(i_mmap_lock))) { + (i_mmap_lock && spin_needbreak(i_mmap_lock))) { if (i_mmap_lock) { *tlbp = NULL; goto out; @@ -1768,8 +1767,7 @@ again: restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr, details); - need_break = need_resched() || - need_lockbreak(details->i_mmap_lock); + need_break = need_resched() || spin_needbreak(details->i_mmap_lock); if (restart_addr >= end_addr) { /* We have now completed this vma: mark it so */ -- cgit v1.3-8-gc7d7 From cae4595764cb3b08f6517e99bac1e3862854b1a1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 30 Jan 2008 13:31:23 +0100 Subject: x86: make __{save,restore}_processor_state static .. allowing to remove their declarations from a global include file (the symbols don't exist for anything but x86). Likewise for 64-bits' fix_processor_context(), just that that one was properly declared in an arch-specific header. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/suspend_64.c | 8 +++++--- arch/x86/power/cpu.c | 4 ++-- include/asm-x86/suspend_64.h | 2 -- include/linux/suspend.h | 3 --- 4 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index 279c25775d19..09199511c256 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c @@ -17,6 +17,8 @@ /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; +static void fix_processor_context(void); + struct saved_context saved_context; /** @@ -34,7 +36,7 @@ struct saved_context saved_context; * needed by kernel A, so that it can operate correctly after the resume * regardless of what kernel B does in the meantime. */ -void __save_processor_state(struct saved_context *ctxt) +static void __save_processor_state(struct saved_context *ctxt) { kernel_fpu_begin(); @@ -89,7 +91,7 @@ static void do_fpu_end(void) * by __save_processor_state() * @ctxt - structure to load the registers contents from */ -void __restore_processor_state(struct saved_context *ctxt) +static void __restore_processor_state(struct saved_context *ctxt) { /* * control registers @@ -133,7 +135,7 @@ void restore_processor_state(void) __restore_processor_state(&saved_context); } -void fix_processor_context(void) +static void fix_processor_context(void) { int cpu = smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 5a98dc35addf..efcf620d1439 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -19,7 +19,7 @@ unsigned long saved_context_esp, saved_context_ebp; unsigned long saved_context_esi, saved_context_edi; unsigned long saved_context_eflags; -void __save_processor_state(struct saved_context *ctxt) +static void __save_processor_state(struct saved_context *ctxt) { mtrr_save_fixed_ranges(NULL); kernel_fpu_begin(); @@ -86,7 +86,7 @@ static void fix_processor_context(void) } -void __restore_processor_state(struct saved_context *ctxt) +static void __restore_processor_state(struct saved_context *ctxt) { /* * control registers diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h index 4404668f9aa4..2eb92cb81a0d 100644 --- a/include/asm-x86/suspend_64.h +++ b/include/asm-x86/suspend_64.h @@ -45,8 +45,6 @@ struct saved_context { #define loaddebug(thread,register) \ set_debugreg((thread)->debugreg##register, register) -extern void fix_processor_context(void); - /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); extern char core_restore_code; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4360e0816956..40280df2a3db 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -211,9 +211,6 @@ static inline int hibernate(void) { return -ENOSYS; } #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); -struct saved_context; -void __save_processor_state(struct saved_context *ctxt); -void __restore_processor_state(struct saved_context *ctxt); /* kernel/power/main.c */ extern struct blocking_notifier_head pm_chain_head; -- cgit v1.3-8-gc7d7 From bdf88217b70dbb18c4ee27a6c497286e040a6705 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:44 +0100 Subject: x86: user_regset header The new header defines the types struct user_regset and struct user_regset_view, with some associated declarations. This new set of interfaces will become the standard way for arch code to expose user-mode machine-specific state. A single set of entry points into arch code can do all the low-level work in one place to fill the needs of core dumps, ptrace, and any other user-mode debugging facilities that might come along in the future. For existing arch code to adapt to the user_regset interfaces, each arch can work from the code it already has to support core files and ptrace. The formats you want for user_regset are the core file formats. The only wrinkle in adapting old ptrace implementation code as user_regset get and set functions is that these functions can be called on current as well as on another task_struct that is stopped and switched out as for ptrace. For some kinds of machine state, you may have to load it directly from CPU registers or otherwise differently for current than for another thread. (Your core dump support already handles this in elf_core_copy_regs for current and elf_core_copy_task_regs for other tasks, so just check there.) The set function should also be made to work on current in case that entails some special cases, though this was never required before for ptrace. Adding this flexibility covers the arch needs to open the door to more sophisticated new debugging facilities that don't always need to context-switch to do every little thing. The copyin/copyout helper functions (in a later patch) relieve the arch code of most of the cumbersome details of the flexible get/set interfaces. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/regset.h | 206 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 include/linux/regset.h (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h new file mode 100644 index 000000000000..85d0fb0a014d --- /dev/null +++ b/include/linux/regset.h @@ -0,0 +1,206 @@ +/* + * User-mode machine state access + * + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * Red Hat Author: Roland McGrath. + */ + +#ifndef _LINUX_REGSET_H +#define _LINUX_REGSET_H 1 + +#include +#include +struct task_struct; +struct user_regset; + + +/** + * user_regset_active_fn - type of @active function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * + * Return -%ENODEV if not available on the hardware found. + * Return %0 if no interesting state in this thread. + * Return >%0 number of @size units of interesting state. + * Any get call fetching state beyond that number will + * see the default initialization state for this data, + * so a caller that knows what the default state is need + * not copy it all out. + * This call is optional; the pointer is %NULL if there + * is no inexpensive check to yield a value < @n. + */ +typedef int user_regset_active_fn(struct task_struct *target, + const struct user_regset *regset); + +/** + * user_regset_get_fn - type of @get function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @pos: offset into the regset data to access, in bytes + * @count: amount of data to copy, in bytes + * @kbuf: if not %NULL, a kernel-space pointer to copy into + * @ubuf: if @kbuf is %NULL, a user-space pointer to copy into + * + * Fetch register values. Return %0 on success; -%EIO or -%ENODEV + * are usual failure returns. The @pos and @count values are in + * bytes, but must be properly aligned. If @kbuf is non-null, that + * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then + * ubuf gives a userland pointer to access directly, and an -%EFAULT + * return value is possible. + */ +typedef int user_regset_get_fn(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); + +/** + * user_regset_set_fn - type of @set function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @pos: offset into the regset data to access, in bytes + * @count: amount of data to copy, in bytes + * @kbuf: if not %NULL, a kernel-space pointer to copy from + * @ubuf: if @kbuf is %NULL, a user-space pointer to copy from + * + * Store register values. Return %0 on success; -%EIO or -%ENODEV + * are usual failure returns. The @pos and @count values are in + * bytes, but must be properly aligned. If @kbuf is non-null, that + * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then + * ubuf gives a userland pointer to access directly, and an -%EFAULT + * return value is possible. + */ +typedef int user_regset_set_fn(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +/** + * user_regset_writeback_fn - type of @writeback function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * @immediate: zero if writeback at completion of next context switch is OK + * + * This call is optional; usually the pointer is %NULL. When + * provided, there is some user memory associated with this regset's + * hardware, such as memory backing cached register data on register + * window machines; the regset's data controls what user memory is + * used (e.g. via the stack pointer value). + * + * Write register data back to user memory. If the @immediate flag + * is nonzero, it must be written to the user memory so uaccess or + * access_process_vm() can see it when this call returns; if zero, + * then it must be written back by the time the task completes a + * context switch (as synchronized with wait_task_inactive()). + * Return %0 on success or if there was nothing to do, -%EFAULT for + * a memory problem (bad stack pointer or whatever), or -%EIO for a + * hardware problem. + */ +typedef int user_regset_writeback_fn(struct task_struct *target, + const struct user_regset *regset, + int immediate); + +/** + * struct user_regset - accessible thread CPU state + * @n: Number of slots (registers). + * @size: Size in bytes of a slot (register). + * @align: Required alignment, in bytes. + * @bias: Bias from natural indexing. + * @core_note_type: ELF note @n_type value used in core dumps. + * @get: Function to fetch values. + * @set: Function to store values. + * @active: Function to report if regset is active, or %NULL. + * @writeback: Function to write data back to user memory, or %NULL. + * + * This data structure describes a machine resource we call a register set. + * This is part of the state of an individual thread, not necessarily + * actual CPU registers per se. A register set consists of a number of + * similar slots, given by @n. Each slot is @size bytes, and aligned to + * @align bytes (which is at least @size). + * + * These functions must be called only on the current thread or on a + * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are + * guaranteed will not be woken up and return to user mode, and that we + * have called wait_task_inactive() on. (The target thread always might + * wake up for SIGKILL while these functions are working, in which case + * that thread's user_regset state might be scrambled.) + * + * The @pos argument must be aligned according to @align; the @count + * argument must be a multiple of @size. These functions are not + * responsible for checking for invalid arguments. + * + * When there is a natural value to use as an index, @bias gives the + * difference between the natural index and the slot index for the + * register set. For example, x86 GDT segment descriptors form a regset; + * the segment selector produces a natural index, but only a subset of + * that index space is available as a regset (the TLS slots); subtracting + * @bias from a segment selector index value computes the regset slot. + * + * If nonzero, @core_note_type gives the n_type field (NT_* value) + * of the core file note in which this regset's data appears. + * NT_PRSTATUS is a special case in that the regset data starts at + * offsetof(struct elf_prstatus, pr_reg) into the note data; that is + * part of the per-machine ELF formats userland knows about. In + * other cases, the core file note contains exactly the whole regset + * (@n * @size) and nothing else. The core file note is normally + * omitted when there is an @active function and it returns zero. + */ +struct user_regset { + user_regset_get_fn *get; + user_regset_set_fn *set; + user_regset_active_fn *active; + user_regset_writeback_fn *writeback; + unsigned int n; + unsigned int size; + unsigned int align; + unsigned int bias; + unsigned int core_note_type; +}; + +/** + * struct user_regset_view - available regsets + * @name: Identifier, e.g. UTS_MACHINE string. + * @regsets: Array of @n regsets available in this view. + * @n: Number of elements in @regsets. + * @e_machine: ELF header @e_machine %EM_* value written in core dumps. + * @e_flags: ELF header @e_flags value written in core dumps. + * @ei_osabi: ELF header @e_ident[%EI_OSABI] value written in core dumps. + * + * A regset view is a collection of regsets (&struct user_regset, + * above). This describes all the state of a thread that can be seen + * from a given architecture/ABI environment. More than one view might + * refer to the same &struct user_regset, or more than one regset + * might refer to the same machine-specific state in the thread. For + * example, a 32-bit thread's state could be examined from the 32-bit + * view or from the 64-bit view. Either method reaches the same thread + * register state, doing appropriate widening or truncation. + */ +struct user_regset_view { + const char *name; + const struct user_regset *regsets; + unsigned int n; + u32 e_flags; + u16 e_machine; + u8 ei_osabi; +}; + +/* + * This is documented here rather than at the definition sites because its + * implementation is machine-dependent but its interface is universal. + */ +/** + * task_user_regset_view - Return the process's native regset view. + * @tsk: a thread of the process in question + * + * Return the &struct user_regset_view that is native for the given process. + * For example, what it would access when it called ptrace(). + * Throughout the life of the process, this only changes at exec. + */ +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk); + + +#endif /* */ -- cgit v1.3-8-gc7d7 From bae3f7c39dee5951bcbedeaedb6744f882a00173 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:45 +0100 Subject: x86: user_regset helpers This adds some inlines to linux/regset.h intended for arch code to use in its user_regset get and set functions. These make it pretty easy to deal with the interface's optional kernel-space or user-space pointers and its generalized access to a part of the register data at a time. In simple cases where the internal data structure matches the exported layout (core dump format), a get function can be nothing but a call to user_regset_copyout, and a set function a call to user_regset_copyin. In other cases the exported layout is usually made up of a few pieces each stored contiguously in a different internal data structure. These helpers make it straightforward to write a get or set function by processing each contiguous chunk of the data in order. The start_pos and end_pos arguments are always constants, so these inlines collapse to a small amount of code. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/regset.h | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index 85d0fb0a014d..761c931af975 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -15,6 +15,7 @@ #include #include +#include struct task_struct; struct user_regset; @@ -203,4 +204,119 @@ struct user_regset_view { const struct user_regset_view *task_user_regset_view(struct task_struct *tsk); +/* + * These are helpers for writing regset get/set functions in arch code. + * Because @start_pos and @end_pos are always compile-time constants, + * these are inlined into very little code though they look large. + * + * Use one or more calls sequentially for each chunk of regset data stored + * contiguously in memory. Call with constants for @start_pos and @end_pos, + * giving the range of byte positions in the regset that data corresponds + * to; @end_pos can be -1 if this chunk is at the end of the regset layout. + * Each call updates the arguments to point past its chunk. + */ + +static inline int user_regset_copyout(unsigned int *pos, unsigned int *count, + void **kbuf, + void __user **ubuf, const void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + data += *pos - start_pos; + if (*kbuf) { + memcpy(*kbuf, data, copy); + *kbuf += copy; + } else if (__copy_to_user(*ubuf, data, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +static inline int user_regset_copyin(unsigned int *pos, unsigned int *count, + const void **kbuf, + const void __user **ubuf, void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + data += *pos - start_pos; + if (*kbuf) { + memcpy(data, *kbuf, copy); + *kbuf += copy; + } else if (__copy_from_user(data, *ubuf, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +/* + * These two parallel the two above, but for portions of a regset layout + * that always read as all-zero or for which writes are ignored. + */ +static inline int user_regset_copyout_zero(unsigned int *pos, + unsigned int *count, + void **kbuf, void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + if (*kbuf) { + memset(*kbuf, 0, copy); + *kbuf += copy; + } else if (__clear_user(*ubuf, copy)) + return -EFAULT; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + +static inline int user_regset_copyin_ignore(unsigned int *pos, + unsigned int *count, + const void **kbuf, + const void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) + return 0; + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = (end_pos < 0 ? *count + : min(*count, end_pos - *pos)); + if (*kbuf) + *kbuf += copy; + else + *ubuf += copy; + *pos += copy; + *count -= copy; + } + return 0; +} + + #endif /* */ -- cgit v1.3-8-gc7d7 From 5bde4d181793be84351bc21c256d8c71cfcd313a Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:47 +0100 Subject: x86: user_regset user-copy helpers This defines two new inlines in linux/regset.h, for use in arch_ptrace implementations and the like. These provide simplified wrappers for using the user_regset interfaces to copy thread regset data into the caller's user-space memory. The inlines are trivial, but make the common uses in places such as ptrace implementation much more concise, easier to read, and less prone to code-copying errors. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/regset.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index 761c931af975..8abee6556223 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -318,5 +318,51 @@ static inline int user_regset_copyin_ignore(unsigned int *pos, return 0; } +/** + * copy_regset_to_user - fetch a thread's user_regset data into user memory + * @target: thread to be examined + * @view: &struct user_regset_view describing user thread machine state + * @setno: index in @view->regsets + * @offset: offset into the regset data, in bytes + * @size: amount of data to copy, in bytes + * @data: user-mode pointer to copy into + */ +static inline int copy_regset_to_user(struct task_struct *target, + const struct user_regset_view *view, + unsigned int setno, + unsigned int offset, unsigned int size, + void __user *data) +{ + const struct user_regset *regset = &view->regsets[setno]; + + if (!access_ok(VERIFY_WRITE, data, size)) + return -EIO; + + return regset->get(target, regset, offset, size, NULL, data); +} + +/** + * copy_regset_from_user - store into thread's user_regset data from user memory + * @target: thread to be examined + * @view: &struct user_regset_view describing user thread machine state + * @setno: index in @view->regsets + * @offset: offset into the regset data, in bytes + * @size: amount of data to copy, in bytes + * @data: user-mode pointer to copy from + */ +static inline int copy_regset_from_user(struct task_struct *target, + const struct user_regset_view *view, + unsigned int setno, + unsigned int offset, unsigned int size, + const void __user *data) +{ + const struct user_regset *regset = &view->regsets[setno]; + + if (!access_ok(VERIFY_READ, data, size)) + return -EIO; + + return regset->set(target, regset, offset, size, NULL, data); +} + #endif /* */ -- cgit v1.3-8-gc7d7 From 032d82d9065dec0e26718eca376c2029e4bd0595 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:47 +0100 Subject: x86: compat_ptrace_request This adds a compat_ptrace_request that is the analogue of ptrace_request for the things that 32-on-64 ptrace implementations can share in common. So far there are just a couple of requests handled generically. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/compat.h | 4 ++++ kernel/ptrace.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ba29d4c59643..a907fbede6c3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -243,6 +243,10 @@ asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, const compat_ulong_t __user *new_nodes); +extern int compat_ptrace_request(struct task_struct *child, + compat_long_t request, + compat_ulong_t addr, compat_ulong_t data); + /* * epoll (fs/eventpoll.c) compat bits follow ... */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e6a99d2793b3..ed1c3d56c2cd 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -607,3 +607,41 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); return (copied == sizeof(data)) ? 0 : -EIO; } + +#ifdef CONFIG_COMPAT +#include + +int compat_ptrace_request(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data) +{ + compat_ulong_t __user *datap = compat_ptr(data); + compat_ulong_t word; + int ret; + + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + ret = access_process_vm(child, addr, &word, sizeof(word), 0); + if (ret != sizeof(word)) + ret = -EIO; + else + ret = put_user(word, datap); + break; + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + ret = access_process_vm(child, addr, &data, sizeof(data), 1); + ret = (ret != sizeof(data) ? -EIO : 0); + break; + + case PTRACE_GETEVENTMSG: + ret = put_user((compat_ulong_t) child->ptrace_message, datap); + break; + + default: + ret = ptrace_request(child, request, addr, data); + } + + return ret; +} +#endif /* CONFIG_COMPAT */ -- cgit v1.3-8-gc7d7 From c269f19617f508cc5c29c0b064c1a437d7011a46 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:48 +0100 Subject: x86: compat_sys_ptrace This adds a generic definition of compat_sys_ptrace that calls compat_arch_ptrace, parallel to sys_ptrace/arch_ptrace. Some machines needing this already define a function by that name. The new generic function is defined only on machines that put #define __ARCH_WANT_COMPAT_SYS_PTRACE into asm/ptrace.h. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/compat.h | 7 +++++++ kernel/ptrace.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index a907fbede6c3..d38655f2be70 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -247,6 +247,13 @@ extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE +extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data); +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data); +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ + /* * epoll (fs/eventpoll.c) compat bits follow ... */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ed1c3d56c2cd..e6e9b8be4b05 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -644,4 +644,50 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, return ret; } + +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data) +{ + struct task_struct *child; + long ret; + + /* + * This lock_kernel fixes a subtle race with suid exec + */ + lock_kernel(); + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); + goto out; + } + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + /* + * Some architectures need to do book-keeping after + * a ptrace attach. + */ + if (!ret) + arch_ptrace_attach(child); + goto out_put_task_struct; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (!ret) + ret = compat_arch_ptrace(child, request, addr, data); + + out_put_task_struct: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ + #endif /* CONFIG_COMPAT */ -- cgit v1.3-8-gc7d7 From bb61682b3f31dec7d058cae2f6edd2275248a704 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:31:56 +0100 Subject: x86: x86 core dump TLS This makes ELF core dumps of 32-bit processes include a new note type NT_386_TLS (0x200) giving the contents of the TLS slots in struct user_desc format. This lets post mortem examination figure out what the segment registers mean like the debugger does with get_thread_area on a live process. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ptrace.c | 1 + include/linux/elf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f8b89059e6ed..e6a680c7daf7 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1313,6 +1313,7 @@ static const struct user_regset x86_32_regsets[] = { .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, [REGSET_TLS] = { + .core_note_type = NT_386_TLS, .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, .size = sizeof(struct user_desc), .align = sizeof(struct user_desc), diff --git a/include/linux/elf.h b/include/linux/elf.h index 576e83bd6d88..7ceb24d87c1a 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -355,6 +355,7 @@ typedef struct elf64_shdr { #define NT_AUXV 6 #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ /* Note header in a PT_NOTE section */ -- cgit v1.3-8-gc7d7 From 74ef649fe847fdfbd3e1732d21b923f59ca04e8c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 30 Jan 2008 13:32:42 +0100 Subject: x86: add _AT() macro to conditionally cast # HG changeset patch # User Jeremy Fitzhardinge # Date 1199317452 28800 # Node ID f7e7db3facd9406545103164f9be8f9ba1a2b549 # Parent 4d9a413a0f4c1d98dbea704f0366457b5117045d x86: add _AT() macro to conditionally cast Define _AT(type, value) to conditionally cast a value when compiling C code, but not when used in assembler. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/const.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/const.h b/include/linux/const.h index 07b300bfe34b..c22c707c455d 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -7,13 +7,18 @@ * C code. Therefore we cannot annotate them always with * 'UL' and other type specifiers unilaterally. We * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. */ #ifdef __ASSEMBLY__ #define _AC(X,Y) X +#define _AT(T,X) X #else #define __AC(X,Y) (X##Y) #define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) #endif #endif /* !(_LINUX_CONST_H) */ -- cgit v1.3-8-gc7d7 From 5280e004fc22314122c84978c0b6a741cf96dc0f Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 30 Jan 2008 13:32:52 +0100 Subject: percpu: move arch XX_PER_CPU_XX definitions into linux/percpu.h - Special consideration for IA64: Add the ability to specify arch specific per cpu flags - remove .data.percpu attribute from DEFINE_PER_CPU for non-smp case. The arch definitions are all the same. So move them into linux/percpu.h. We cannot move DECLARE_PER_CPU since some include files just include asm/percpu.h to avoid include recursion problems. Cc: Rusty Russell Cc: Andi Kleen Signed-off-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/percpu.h | 18 ------------------ include/asm-ia64/percpu.h | 24 ++---------------------- include/asm-powerpc/percpu.h | 17 ----------------- include/asm-s390/percpu.h | 18 ------------------ include/asm-sparc64/percpu.h | 16 ---------------- include/asm-x86/percpu_32.h | 12 ------------ include/asm-x86/percpu_64.h | 17 ----------------- include/linux/percpu.h | 24 ++++++++++++++++++++++++ 8 files changed, 26 insertions(+), 120 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b5e53b9ab1f7..e038f13594e5 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -9,15 +9,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define per_cpu_offset(x) (__per_cpu_offset[x]) -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp - /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ extern int simple_identifier_##var(void); \ @@ -35,12 +26,6 @@ do { \ } while (0) #else /* ! SMP */ -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #define __raw_get_cpu_var(var) per_cpu__##var @@ -49,7 +34,4 @@ do { \ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h index c4f1e328a5ba..0095bcf79848 100644 --- a/include/asm-ia64/percpu.h +++ b/include/asm-ia64/percpu.h @@ -16,28 +16,11 @@ #include #ifdef HAVE_MODEL_SMALL_ATTRIBUTE -# define __SMALL_ADDR_AREA __attribute__((__model__ (__small__))) -#else -# define __SMALL_ADDR_AREA +# define PER_CPU_ATTRIBUTES __attribute__((__model__ (__small__))) #endif #define DECLARE_PER_CPU(type, name) \ - extern __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name - -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) \ - __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name - -#ifdef CONFIG_SMP -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __SMALL_ADDR_AREA __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp -#else -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) -#endif + extern PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name /* * Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an @@ -68,9 +51,6 @@ extern void *per_cpu_init(void); #endif /* SMP */ -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - /* * Be extremely careful when taking the address of this variable! Due to virtual * remapping, it is different from the canonical address returned by __get_cpu_var(var)! diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h index 6b229626d3ff..cc1cbf656b02 100644 --- a/include/asm-powerpc/percpu.h +++ b/include/asm-powerpc/percpu.h @@ -16,15 +16,6 @@ #define __my_cpu_offset() get_paca()->data_offset #define per_cpu_offset(x) (__per_cpu_offset(x)) -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp - /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) @@ -43,11 +34,6 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #define __raw_get_cpu_var(var) per_cpu__##var @@ -56,9 +42,6 @@ extern void setup_per_cpu_areas(void); #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - #else #include #endif diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h index f94d0d3cdb2f..2d676a873858 100644 --- a/include/asm-s390/percpu.h +++ b/include/asm-s390/percpu.h @@ -34,16 +34,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) \ - __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp - #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) @@ -60,11 +50,6 @@ do { \ #else /* ! SMP */ -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - #define __get_cpu_var(var) __reloc_hide(var,0) #define __raw_get_cpu_var(var) __reloc_hide(var,0) #define per_cpu(var,cpu) __reloc_hide(var,0) @@ -73,7 +58,4 @@ do { \ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - #endif /* __ARCH_S390_PERCPU__ */ diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h index a1f53a4da405..c7e52decba98 100644 --- a/include/asm-sparc64/percpu.h +++ b/include/asm-sparc64/percpu.h @@ -16,15 +16,6 @@ extern unsigned long __per_cpu_shift; (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) #define per_cpu_offset(x) (__per_cpu_offset(x)) -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp - /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) @@ -41,10 +32,6 @@ do { \ #else /* ! SMP */ #define real_setup_per_cpu_areas() do { } while (0) -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) #define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var @@ -54,7 +41,4 @@ do { \ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - #endif /* __ARCH_SPARC64_PERCPU__ */ diff --git a/include/asm-x86/percpu_32.h b/include/asm-x86/percpu_32.h index 3949586bf94e..77bd0045f331 100644 --- a/include/asm-x86/percpu_32.h +++ b/include/asm-x86/percpu_32.h @@ -47,16 +47,7 @@ extern unsigned long __per_cpu_offset[]; #define per_cpu_offset(x) (__per_cpu_offset[x]) -/* Separate out the type, so (int[3], foo) works. */ #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp - /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); @@ -81,9 +72,6 @@ do { \ (src), (size)); \ } while (0) -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ #define __percpu_seg "%%fs:" #else /* !SMP */ diff --git a/include/asm-x86/percpu_64.h b/include/asm-x86/percpu_64.h index 5abd48270101..24fe7075248d 100644 --- a/include/asm-x86/percpu_64.h +++ b/include/asm-x86/percpu_64.h @@ -16,15 +16,6 @@ #define per_cpu_offset(x) (__per_cpu_offset(x)) -/* Separate out the type, so (int[3], foo) works. */ -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_internodealigned_in_smp - /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ extern int simple_identifier_##var(void); \ @@ -49,11 +40,6 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #define __raw_get_cpu_var(var) per_cpu__##var @@ -62,7 +48,4 @@ extern void setup_per_cpu_areas(void); #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - #endif /* _ASM_X8664_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 926adaae0f96..00412bb494c4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,6 +9,30 @@ #include +#ifndef PER_CPU_ATTRIBUTES +#define PER_CPU_ATTRIBUTES +#endif + +#ifdef CONFIG_SMP +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.shared_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ + ____cacheline_aligned_in_smp +#else +#define DEFINE_PER_CPU(type, name) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) +#endif + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM #ifdef CONFIG_MODULES -- cgit v1.3-8-gc7d7 From 8c1c9356429741a82ff176d0f3400fb9e06b2a30 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Wed, 30 Jan 2008 13:32:53 +0100 Subject: x86: kprobes: add kprobes smoke tests that run on boot Here is a quick and naive smoke test for kprobes. This is intended to just verify if some unrelated change broke the *probes subsystem. It is self contained, architecture agnostic and isn't of any great use by itself. This needs to be built in the kernel and runs a basic set of tests to verify if kprobes, jprobes and kretprobes run fine on the kernel. In case of an error, it'll print out a message with a "BUG" prefix. This is a start; we intend to add more tests to this bucket over time. Thanks to Jim Keniston and Masami Hiramatsu for comments and suggestions. Tested on x86 (32/64) and powerpc. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 10 +++ kernel/Makefile | 1 + kernel/kprobes.c | 2 + kernel/test_kprobes.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 12 +++ 5 files changed, 241 insertions(+) create mode 100644 kernel/test_kprobes.c (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 81891581e89b..6168c0a44172 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -182,6 +182,15 @@ static inline void kretprobe_assert(struct kretprobe_instance *ri, } } +#ifdef CONFIG_KPROBES_SANITY_TEST +extern int init_test_probes(void); +#else +static inline int init_test_probes(void) +{ + return 0; +} +#endif /* CONFIG_KPROBES_SANITY_TEST */ + extern spinlock_t kretprobe_lock; extern struct mutex kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); @@ -227,6 +236,7 @@ void unregister_kretprobe(struct kretprobe *rp); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); + #else /* CONFIG_KPROBES */ #define __kprobes /**/ diff --git a/kernel/Makefile b/kernel/Makefile index 390d42146267..62015c3d8d91 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o +obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_AUDIT_TREE) += audit_tree.o diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e3a5d817ac9b..d0493eafea3e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -824,6 +824,8 @@ static int __init init_kprobes(void) if (!err) err = register_die_notifier(&kprobe_exceptions_nb); + if (!err) + init_test_probes(); return err; } diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c new file mode 100644 index 000000000000..88cdb109e13c --- /dev/null +++ b/kernel/test_kprobes.c @@ -0,0 +1,216 @@ +/* + * test_kprobes.c - simple sanity test for *probes + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + */ + +#include +#include +#include + +#define div_factor 3 + +static u32 rand1, preh_val, posth_val, jph_val; +static int errors, handler_errors, num_tests; + +static noinline u32 kprobe_target(u32 value) +{ + /* + * gcc ignores noinline on some architectures unless we stuff + * sufficient lard into the function. The get_kprobe() here is + * just for that. + * + * NOTE: We aren't concerned about the correctness of get_kprobe() + * here; hence, this call is neither under !preempt nor with the + * kprobe_mutex held. This is fine(tm) + */ + if (get_kprobe((void *)0xdeadbeef)) + printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n"); + + return (value / div_factor); +} + +static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + preh_val = (rand1 / div_factor); + return 0; +} + +static void kp_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + if (preh_val != (rand1 / div_factor)) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in post_handler\n"); + } + posth_val = preh_val + div_factor; +} + +static struct kprobe kp = { + .symbol_name = "kprobe_target", + .pre_handler = kp_pre_handler, + .post_handler = kp_post_handler +}; + +static int test_kprobe(void) +{ + int ret; + + ret = register_kprobe(&kp); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_kprobe returned %d\n", ret); + return ret; + } + + ret = kprobe_target(rand1); + unregister_kprobe(&kp); + + if (preh_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe pre_handler not called\n"); + handler_errors++; + } + + if (posth_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kprobe post_handler not called\n"); + handler_errors++; + } + + return 0; +} + +static u32 j_kprobe_target(u32 value) +{ + if (value != rand1) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in jprobe handler\n"); + } + + jph_val = rand1; + jprobe_return(); + return 0; +} + +static struct jprobe jp = { + .entry = j_kprobe_target, + .kp.symbol_name = "kprobe_target" +}; + +static int test_jprobe(void) +{ + int ret; + + ret = register_jprobe(&jp); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_jprobe returned %d\n", ret); + return ret; + } + + ret = kprobe_target(rand1); + unregister_jprobe(&jp); + if (jph_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "jprobe handler not called\n"); + handler_errors++; + } + + return 0; +} + +#ifdef CONFIG_KRETPROBES +static u32 krph_val; + +static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long ret = regs_return_value(regs); + + if (ret != (rand1 / div_factor)) { + handler_errors++; + printk(KERN_ERR "Kprobe smoke test failed: " + "incorrect value in kretprobe handler\n"); + } + + krph_val = (rand1 / div_factor); + return 0; +} + +static struct kretprobe rp = { + .handler = return_handler, + .kp.symbol_name = "kprobe_target" +}; + +static int test_kretprobe(void) +{ + int ret; + + ret = register_kretprobe(&rp); + if (ret < 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "register_kretprobe returned %d\n", ret); + return ret; + } + + ret = kprobe_target(rand1); + unregister_kretprobe(&rp); + if (krph_val == 0) { + printk(KERN_ERR "Kprobe smoke test failed: " + "kretprobe handler not called\n"); + handler_errors++; + } + + return 0; +} +#endif /* CONFIG_KRETPROBES */ + +int init_test_probes(void) +{ + int ret; + + do { + rand1 = random32(); + } while (rand1 <= div_factor); + + printk(KERN_INFO "Kprobe smoke test started\n"); + num_tests++; + ret = test_kprobe(); + if (ret < 0) + errors++; + + num_tests++; + ret = test_jprobe(); + if (ret < 0) + errors++; + +#ifdef CONFIG_KRETPROBES + num_tests++; + ret = test_kretprobe(); + if (ret < 0) + errors++; +#endif /* CONFIG_KRETPROBES */ + + if (errors) + printk(KERN_ERR "BUG: Kprobe smoke test: %d out of " + "%d tests failed\n", errors, num_tests); + else if (handler_errors) + printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) " + "running handlers\n", handler_errors); + else + printk(KERN_INFO "Kprobe smoke test passed successfully\n"); + + return 0; +} diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c4ecb2994ba3..f535b9b5eb00 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -494,6 +494,18 @@ config RCU_TORTURE_TEST Say M if you want the RCU torture tests to build as a module. Say N if you are unsure. +config KPROBES_SANITY_TEST + bool "Kprobes sanity tests" + depends on DEBUG_KERNEL + depends on KPROBES + default n + help + This option provides for testing basic kprobes functionality on + boot. A sample kprobe, jprobe and kretprobe are inserted and + verified for functionality. + + Say N if you are unsure. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_KERNEL -- cgit v1.3-8-gc7d7 From d50efc6c40620b2e11648cac64ebf4a824e40382 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:33:00 +0100 Subject: x86: fix UML and -regparm=3 introduce the "asmregparm" calling convention: for functions implemented in assembly with a fixed regparm input parameters calling convention. mark the semaphore and rwsem slowpath functions with that. Signed-off-by: Ingo Molnar Signed-off-by: Miklos Szeredi Signed-off-by: Thomas Gleixner --- include/asm-x86/linkage.h | 5 +++++ include/asm-x86/rwsem.h | 12 ++++++++---- include/asm-x86/semaphore_32.h | 8 ++++---- include/linux/linkage.h | 4 ++++ lib/rwsem.c | 8 ++++---- 5 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h index 5a4c95905420..31739c7d66a9 100644 --- a/include/asm-x86/linkage.h +++ b/include/asm-x86/linkage.h @@ -9,6 +9,11 @@ #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret)) +/* + * For 32-bit UML - mark functions implemented in assembly that use + * regparm input parameters: + */ +#define asmregparm __attribute__((regparm(3))) #endif #ifdef CONFIG_X86_ALIGNMENT_16 diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h index a7e7e14cb430..520a379f4b80 100644 --- a/include/asm-x86/rwsem.h +++ b/include/asm-x86/rwsem.h @@ -44,10 +44,14 @@ struct rwsem_waiter; -extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem)); -extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem)); -extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *)); -extern struct rw_semaphore *FASTCALL(rwsem_downgrade_wake(struct rw_semaphore *sem)); +extern asmregparm struct rw_semaphore * + rwsem_down_read_failed(struct rw_semaphore *sem); +extern asmregparm struct rw_semaphore * + rwsem_down_write_failed(struct rw_semaphore *sem); +extern asmregparm struct rw_semaphore * + rwsem_wake(struct rw_semaphore *); +extern asmregparm struct rw_semaphore * + rwsem_downgrade_wake(struct rw_semaphore *sem); /* * the semaphore definition diff --git a/include/asm-x86/semaphore_32.h b/include/asm-x86/semaphore_32.h index cc826e85323f..ac96d3804d0c 100644 --- a/include/asm-x86/semaphore_32.h +++ b/include/asm-x86/semaphore_32.h @@ -83,10 +83,10 @@ static inline void init_MUTEX_LOCKED (struct semaphore *sem) sema_init(sem, 0); } -void __down_failed(void /* special register calling convention */); -int __down_failed_interruptible(void /* params in registers */); -int __down_failed_trylock(void /* params in registers */); -void __up_wakeup(void /* special register calling convention */); +extern asmregparm void __down_failed(atomic_t *count_ptr); +extern asmregparm int __down_failed_interruptible(atomic_t *count_ptr); +extern asmregparm int __down_failed_trylock(atomic_t *count_ptr); +extern asmregparm void __up_wakeup(atomic_t *count_ptr); /* * This is ugly, but we want the default case to fall through. diff --git a/include/linux/linkage.h b/include/linux/linkage.h index ff203dd02919..ceeeb5ea5b15 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -13,6 +13,10 @@ #define asmlinkage CPP_ASMLINKAGE #endif +#ifndef asmregparm +# define asmregparm +#endif + #ifndef prevent_tail_call # define prevent_tail_call(ret) do { } while (0) #endif diff --git a/lib/rwsem.c b/lib/rwsem.c index 7d02700a4b0e..3e3365e5665e 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -187,7 +187,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* * wait for the read lock to be granted */ -struct rw_semaphore fastcall __sched * +asmregparm struct rw_semaphore __sched * rwsem_down_read_failed(struct rw_semaphore *sem) { struct rwsem_waiter waiter; @@ -201,7 +201,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem) /* * wait for the write lock to be granted */ -struct rw_semaphore fastcall __sched * +asmregparm struct rw_semaphore __sched * rwsem_down_write_failed(struct rw_semaphore *sem) { struct rwsem_waiter waiter; @@ -216,7 +216,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem) * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here */ -struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem) +asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; @@ -236,7 +236,7 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem) * - caller incremented waiting part of count and discovered it still negative * - just wake up any readers at the front of the queue */ -struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem) +asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; -- cgit v1.3-8-gc7d7 From 076f9776f5d8d131b36955db8641aba3893c2c1b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:33:06 +0100 Subject: x86: make early printk selectable on 64-bit as well Enable CONFIG_EMBEDDED to select CONFIG_EARLY_PRINTK on 64-bit as well. saves ~2K: text data bss dec hex filename 7290283 3672091 1907848 12870222 c4624e vmlinux.before 7288373 3671795 1907848 12868016 c459b0 vmlinux.after Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 2 +- arch/x86/kernel/head64.c | 7 ++++++- arch/x86/kernel/head_64.S | 7 +++++++ include/asm-x86/kdebug.h | 1 - include/linux/kernel.h | 3 +++ kernel/printk.c | 7 +++++++ 6 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 15854b53badc..88420af98140 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -6,7 +6,7 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" config EARLY_PRINTK - bool "Early printk" if EMBEDDED && DEBUG_KERNEL && X86_32 + bool "Early printk" if EMBEDDED default y help Write kernel log output directly into the VGA buffer or to a serial diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 85c1e6bf8022..87e031d4abf1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -58,8 +58,13 @@ void __init x86_64_start_kernel(char * real_mode_data) /* Make NULL pointers segfault */ zap_identity_mappings(); - for (i = 0; i < IDT_ENTRIES; i++) + for (i = 0; i < IDT_ENTRIES; i++) { +#ifdef CONFIG_EARLY_PRINTK set_intr_gate(i, &early_idt_handlers[i]); +#else + set_intr_gate(i, early_idt_handler); +#endif + } load_idt((const struct desc_ptr *)&idt_descr); early_printk("Kernel alive\n"); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 8b4c35cb519a..1d5a7a361200 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -267,6 +267,7 @@ init_rsp: bad_address: jmp bad_address +#ifdef CONFIG_EARLY_PRINTK .macro early_idt_tramp first, last .ifgt \last-\first early_idt_tramp \first, \last-1 @@ -281,8 +282,10 @@ early_idt_handlers: early_idt_tramp 64, 127 early_idt_tramp 128, 191 early_idt_tramp 192, 255 +#endif ENTRY(early_idt_handler) +#ifdef CONFIG_EARLY_PRINTK cmpl $2,early_recursion_flag(%rip) jz 1f incl early_recursion_flag(%rip) @@ -311,8 +314,11 @@ ENTRY(early_idt_handler) movq 8(%rsp),%rsi # get rip again call __print_symbol #endif +#endif /* EARLY_PRINTK */ 1: hlt jmp 1b + +#ifdef CONFIG_EARLY_PRINTK early_recursion_flag: .long 0 @@ -320,6 +326,7 @@ early_idt_msg: .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" early_idt_ripmsg: .asciz "RIP %s\n" +#endif /* CONFIG_EARLY_PRINTK */ .balign PAGE_SIZE diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index a5e5e3b7eb23..e9f42d1ac38f 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -22,7 +22,6 @@ enum die_val { DIE_PAGE_FAULT, }; -extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); extern void printk_address(unsigned long address); extern void die(const char *,struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a7283c9beadf..ff356b2ee478 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -194,6 +194,9 @@ static inline int log_buf_read(int idx) { return 0; } static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } #endif +extern void __attribute__((format(printf, 1, 2))) + early_printk(const char *fmt, ...); + unsigned long int_sqrt(unsigned long); extern int printk_ratelimit(void); diff --git a/kernel/printk.c b/kernel/printk.c index 3b7c968d0ef9..58bbec684119 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -36,6 +36,13 @@ #include +/* + * Architectures can override it: + */ +void __attribute__((weak)) early_printk(const char *fmt, ...) +{ +} + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) /* printk's without a loglevel use this.. */ -- cgit v1.3-8-gc7d7 From 6b8be6df7f971919622d152d144c8798ad7fd160 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Wed, 30 Jan 2008 13:33:13 +0100 Subject: x86: add ENDPROC() markers The ENDPROCs() were not used everywhere. Some code used just END() instead, while other code used nothing. um/sys-i386/checksum.S didn't #include . I also got confused because gcc puts the .type near the ENTRY, while ENDPROC puts it on the opposite end. Signed off by: John Reiser Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/lib/semaphore_32.S | 20 ++++++++++---------- include/linux/linkage.h | 4 ++++ 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 444fba400983..e2c6e0d5e125 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S @@ -49,7 +49,7 @@ ENTRY(__down_failed) ENDFRAME ret CFI_ENDPROC - END(__down_failed) + ENDPROC(__down_failed) ENTRY(__down_failed_interruptible) CFI_STARTPROC @@ -70,7 +70,7 @@ ENTRY(__down_failed_interruptible) ENDFRAME ret CFI_ENDPROC - END(__down_failed_interruptible) + ENDPROC(__down_failed_interruptible) ENTRY(__down_failed_trylock) CFI_STARTPROC @@ -91,7 +91,7 @@ ENTRY(__down_failed_trylock) ENDFRAME ret CFI_ENDPROC - END(__down_failed_trylock) + ENDPROC(__down_failed_trylock) ENTRY(__up_wakeup) CFI_STARTPROC @@ -112,7 +112,7 @@ ENTRY(__up_wakeup) ENDFRAME ret CFI_ENDPROC - END(__up_wakeup) + ENDPROC(__up_wakeup) /* * rw spinlock fallbacks @@ -132,7 +132,7 @@ ENTRY(__write_lock_failed) ENDFRAME ret CFI_ENDPROC - END(__write_lock_failed) + ENDPROC(__write_lock_failed) ENTRY(__read_lock_failed) CFI_STARTPROC @@ -148,7 +148,7 @@ ENTRY(__read_lock_failed) ENDFRAME ret CFI_ENDPROC - END(__read_lock_failed) + ENDPROC(__read_lock_failed) #endif @@ -170,7 +170,7 @@ ENTRY(call_rwsem_down_read_failed) CFI_ADJUST_CFA_OFFSET -4 ret CFI_ENDPROC - END(call_rwsem_down_read_failed) + ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) CFI_STARTPROC @@ -182,7 +182,7 @@ ENTRY(call_rwsem_down_write_failed) CFI_ADJUST_CFA_OFFSET -4 ret CFI_ENDPROC - END(call_rwsem_down_write_failed) + ENDPROC(call_rwsem_down_write_failed) ENTRY(call_rwsem_wake) CFI_STARTPROC @@ -196,7 +196,7 @@ ENTRY(call_rwsem_wake) CFI_ADJUST_CFA_OFFSET -4 1: ret CFI_ENDPROC - END(call_rwsem_wake) + ENDPROC(call_rwsem_wake) /* Fix up special calling conventions */ ENTRY(call_rwsem_downgrade_wake) @@ -214,6 +214,6 @@ ENTRY(call_rwsem_downgrade_wake) CFI_ADJUST_CFA_OFFSET -4 ret CFI_ENDPROC - END(call_rwsem_downgrade_wake) + ENDPROC(call_rwsem_downgrade_wake) #endif diff --git a/include/linux/linkage.h b/include/linux/linkage.h index ceeeb5ea5b15..3faf599ea58e 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -57,6 +57,10 @@ .size name, .-name #endif +/* If symbol 'name' is treated as a subroutine (gets called, and returns) + * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of + * static analysis tools such as stack depth analyzer. + */ #ifndef ENDPROC #define ENDPROC(name) \ .type name, @function; \ -- cgit v1.3-8-gc7d7 From ca74a6f84e68b44867022f4a4f3ec17c087c864e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:33:17 +0100 Subject: x86: optimize lock prefix switching to run less frequently On VMs implemented using JITs that cache translated code changing the lock prefixes is a quite costly operation that forces the JIT to throw away and retranslate a lot of code. Previously a SMP kernel would rewrite the locks once for each CPU which is quite unnecessary. This patch changes the code to never switch at boot in the normal case (SMP kernel booting with >1 CPU) or only once for SMP kernel on UP. This makes a significant difference in boot up performance on AMD SimNow! Also I expect it to be a little faster on native systems too because a smp switch does a lot of text_poke()s which each synchronize the pipeline. v1->v2: Rename max_cpus v1->v2: Fix off by one in UP check (Thomas Gleixner) Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/alternative.c | 16 ++++++++++++++-- include/linux/smp.h | 2 ++ init/main.c | 16 ++++++++-------- 3 files changed, 24 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cdc43242da92..318a4f9b7ece 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -273,6 +273,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_SPINLOCK(smp_alt); +static int smp_mode = 1; /* protected by smp_alt */ void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, @@ -354,7 +355,14 @@ void alternatives_smp_switch(int smp) BUG_ON(!smp && (num_online_cpus() > 1)); spin_lock_irqsave(&smp_alt, flags); - if (smp) { + + /* + * Avoid unnecessary switches because it forces JIT based VMs to + * throw away all cached translations, which can be quite costly. + */ + if (smp == smp_mode) { + /* nothing */ + } else if (smp) { printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); @@ -369,6 +377,7 @@ void alternatives_smp_switch(int smp) alternatives_smp_unlock(mod->locks, mod->locks_end, mod->text, mod->text_end); } + smp_mode = smp; spin_unlock_irqrestore(&smp_alt, flags); } @@ -441,7 +450,10 @@ void __init alternative_instructions(void) alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - alternatives_smp_switch(0); + + /* Only switch to UP mode if we don't immediately boot others */ + if (num_possible_cpus() == 1 || setup_max_cpus <= 1) + alternatives_smp_switch(0); } #endif apply_paravirt(__parainstructions, __parainstructions_end); diff --git a/include/linux/smp.h b/include/linux/smp.h index c25e66bcecf3..55232ccf9cfd 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -78,6 +78,8 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); */ void smp_prepare_boot_cpu(void); +extern unsigned int setup_max_cpus; + #else /* !SMP */ /* diff --git a/init/main.c b/init/main.c index 3f8aba291ed3..5843fe996703 100644 --- a/init/main.c +++ b/init/main.c @@ -128,7 +128,7 @@ static char *ramdisk_execute_command; #ifdef CONFIG_SMP /* Setup configured maximum number of CPUs to activate */ -static unsigned int __initdata max_cpus = NR_CPUS; +unsigned int __initdata setup_max_cpus = NR_CPUS; /* * Setup routine for controlling SMP activation @@ -146,7 +146,7 @@ static inline void disable_ioapic_setup(void) {}; static int __init nosmp(char *str) { - max_cpus = 0; + setup_max_cpus = 0; disable_ioapic_setup(); return 0; } @@ -155,8 +155,8 @@ early_param("nosmp", nosmp); static int __init maxcpus(char *str) { - get_option(&str, &max_cpus); - if (max_cpus == 0) + get_option(&str, &setup_max_cpus); + if (setup_max_cpus == 0) disable_ioapic_setup(); return 0; @@ -164,7 +164,7 @@ static int __init maxcpus(char *str) early_param("maxcpus", maxcpus); #else -#define max_cpus NR_CPUS +#define setup_max_cpus NR_CPUS #endif /* @@ -393,7 +393,7 @@ static void __init smp_init(void) /* FIXME: This should be done in userspace --RR */ for_each_present_cpu(cpu) { - if (num_online_cpus() >= max_cpus) + if (num_online_cpus() >= setup_max_cpus) break; if (!cpu_online(cpu)) cpu_up(cpu); @@ -401,7 +401,7 @@ static void __init smp_init(void) /* Any cleanup work */ printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); - smp_cpus_done(max_cpus); + smp_cpus_done(setup_max_cpus); } #endif @@ -824,7 +824,7 @@ static int __init kernel_init(void * unused) __set_special_pids(1, 1); cad_pid = task_pid(current); - smp_prepare_cpus(max_cpus); + smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); -- cgit v1.3-8-gc7d7 From 03252919b79891063cf99145612360efbdf9500b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:33:18 +0100 Subject: x86: print which shared library/executable faulted in segfault etc. messages v3 They now look like: hal-resmgr[13791]: segfault at 3c rip 2b9c8caec182 rsp 7fff1e825d30 error 4 in libacl.so.1.1.0[2b9c8caea000+6000] This makes it easier to pinpoint bugs to specific libraries. And printing the offset into a mapping also always allows to find the correct fault point in a library even with randomized mappings. Previously there was no way to actually find the correct code address inside the randomized mapping. Relies on earlier patch to shorten the printk formats. They are often now longer than 80 characters, but I think that's worth it. [includes fix from Eric Dumazet to check d_path error value] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 7 +++++-- arch/x86/kernel/signal_64.c | 7 +++++-- arch/x86/kernel/traps_32.c | 7 +++++-- arch/x86/kernel/traps_64.c | 14 ++++++++++---- arch/x86/mm/fault_32.c | 4 +++- arch/x86/mm/fault_64.c | 4 +++- include/linux/mm.h | 1 + mm/memory.c | 31 +++++++++++++++++++++++++++++++ 8 files changed, 63 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 74df55895c8c..89a690edf999 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -198,12 +198,15 @@ asmlinkage int sys_sigreturn(unsigned long __unused) return ax; badframe: - if (show_unhandled_signals && printk_ratelimit()) + if (show_unhandled_signals && printk_ratelimit()) { printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx" - " sp:%lx oeax:%lx\n", + " sp:%lx oeax:%lx", task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, current->comm, task_pid_nr(current), frame, regs->ip, regs->sp, regs->orig_ax); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } force_sig(SIGSEGV, current); return 0; diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 4eb751c60390..7347bb14e306 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -484,9 +484,12 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; - if (show_unhandled_signals && printk_ratelimit()) - printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx\n", + if (show_unhandled_signals && printk_ratelimit()) { + printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } force_sig(SIGSEGV, me); } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 6f3bb287c702..270cfd483160 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -609,11 +609,14 @@ void __kprobes do_general_protection(struct pt_regs * regs, current->thread.error_code = error_code; current->thread.trap_no = 13; if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && - printk_ratelimit()) + printk_ratelimit()) { printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx\n", + "%s[%d] general protection ip:%lx sp:%lx error:%lx", current->comm, task_pid_nr(current), regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } force_sig(SIGSEGV, current); return; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 814801f4eb9e..911ed28afff8 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -642,11 +642,14 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, tsk->thread.trap_no = trapnr; if (show_unhandled_signals && unhandled_signal(tsk, signr) && - printk_ratelimit()) + printk_ratelimit()) { printk(KERN_INFO - "%s[%d] trap %s ip:%lx sp:%lx error:%lx\n", + "%s[%d] trap %s ip:%lx sp:%lx error:%lx", tsk->comm, tsk->pid, str, regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } if (info) force_sig_info(signr, info, tsk); @@ -741,11 +744,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, tsk->thread.trap_no = 13; if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) + printk_ratelimit()) { printk(KERN_INFO - "%s[%d] general protection ip:%lx sp:%lx error:%lx\n", + "%s[%d] general protection ip:%lx sp:%lx error:%lx", tsk->comm, tsk->pid, regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } force_sig(SIGSEGV, tsk); return; diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 13d295506a17..276863dc4bdd 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -514,11 +514,13 @@ bad_area_nosemaphore: #ifdef CONFIG_X86_32 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", #else - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n", + "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", #endif task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); } tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index b606bdefbb72..9ef0306efe9e 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -552,11 +552,13 @@ bad_area_nosemaphore: #ifdef CONFIG_X86_32 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx", #else - "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n", + "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx", #endif task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, regs->ip, regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); } tsk->thread.cr2 = address; diff --git a/include/linux/mm.h b/include/linux/mm.h index 1897ca223eca..3c22d971afa7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1146,6 +1146,7 @@ extern int randomize_va_space; #endif const char * arch_vma_name(struct vm_area_struct *vma); +void print_vma_addr(char *prefix, unsigned long rip); struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); diff --git a/mm/memory.c b/mm/memory.c index 673ebbf499c7..d902d0e25edc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2754,3 +2754,34 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return buf - old_buf; } + +/* + * Print the name of a VMA. + */ +void print_vma_addr(char *prefix, unsigned long ip) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ip); + if (vma && vma->vm_file) { + struct file *f = vma->vm_file; + char *buf = (char *)__get_free_page(GFP_KERNEL); + if (buf) { + char *p, *s; + + p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + printk("%s%s[%lx+%lx]", prefix, p, + vma->vm_start, + vma->vm_end - vma->vm_start); + free_page((unsigned long)buf); + } + } + up_read(¤t->mm->mmap_sem); +} -- cgit v1.3-8-gc7d7 From 0acf8e3447b893ff921863c2a4258e210d584452 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 30 Jan 2008 13:33:36 +0100 Subject: pci: add PCI identifiers for the RDC devices This patch defines the PCI identifiers found in the RDC R-321x System-on-Chip. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/pci_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c69531348363..41f6f28690f6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2085,6 +2085,13 @@ #define PCI_VENDOR_ID_BELKIN 0x1799 #define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f +#define PCI_VENDOR_ID_RDC 0x17f3 +#define PCI_DEVICE_ID_RDC_R6020 0x6020 +#define PCI_DEVICE_ID_RDC_R6030 0x6030 +#define PCI_DEVICE_ID_RDC_R6040 0x6040 +#define PCI_DEVICE_ID_RDC_R6060 0x6060 +#define PCI_DEVICE_ID_RDC_R6061 0x6061 + #define PCI_VENDOR_ID_LENOVO 0x17aa #define PCI_VENDOR_ID_ARECA 0x17d3 -- cgit v1.3-8-gc7d7 From a5a19c63f4e55e32dc0bc3d936d7f94793d8b380 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 30 Jan 2008 13:33:39 +0100 Subject: x86: demacro asm-x86/pgalloc_32.h Convert macros into inline functions, for better type-checking. This patch required a little bit of fiddling with headers in order to make __(pte|pmd)_free_tlb inline rather than macros. asm-generic/tlb.h includes asm/pgalloc.h, though it doesn't directly use any pgalloc definitions. I removed this include to avoid an include cycle, but it may cause secondary compile failures by things depending on the indirect inclusion; arch/x86/mm/hugetlbpage.c was one such place; there may be others. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/hugetlbpage.c | 3 +- arch/x86/mm/init_32.c | 1 + include/asm-generic/tlb.h | 1 - include/asm-x86/pgalloc_32.h | 61 +++++++++++++++++++++++++--------------- include/asm-x86/pgtable-3level.h | 2 -- include/linux/swap.h | 1 + 6 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 6c06d9c0488e..4fbafb4bc2f0 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -15,6 +15,7 @@ #include #include #include +#include static unsigned long page_table_shareable(struct vm_area_struct *svma, struct vm_area_struct *vma, @@ -88,7 +89,7 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) spin_lock(&mm->page_table_lock); if (pud_none(*pud)) - pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK); + pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); else put_page(virt_to_page(spte)); spin_unlock(&mm->page_table_lock); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 98d2acae4f64..459b384acda9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 75f2bfab614f..6ce9f3ab928d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -15,7 +15,6 @@ #include #include -#include #include /* diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h index fbc6357f5eba..3482c3427897 100644 --- a/include/asm-x86/pgalloc_32.h +++ b/include/asm-x86/pgalloc_32.h @@ -3,6 +3,8 @@ #include #include /* for struct page */ +#include +#include #ifdef CONFIG_PARAVIRT #include @@ -14,19 +16,20 @@ #define paravirt_release_pd(pfn) do { } while (0) #endif -#define pmd_populate_kernel(mm, pmd, pte) \ -do { \ - paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ -} while (0) +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} -#define pmd_populate(mm, pmd, pte) \ -do { \ - paravirt_alloc_pt(mm, page_to_pfn(pte)); \ - set_pmd(pmd, __pmd(_PAGE_TABLE + \ - ((unsigned long long)page_to_pfn(pte) << \ - (unsigned long long) PAGE_SHIFT))); \ -} while (0) +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) +{ + unsigned long pfn = page_to_pfn(pte); + + paravirt_alloc_pt(mm, pfn); + set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); +} /* * Allocate and free page tables. @@ -48,20 +51,34 @@ static inline void pte_free(struct page *pte) } -#define __pte_free_tlb(tlb,pte) \ -do { \ - paravirt_release_pt(page_to_pfn(pte)); \ - tlb_remove_page((tlb),(pte)); \ -} while (0) +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) +{ + paravirt_release_pt(page_to_pfn(pte)); + tlb_remove_page(tlb, pte); +} #ifdef CONFIG_X86_PAE /* * In the PAE case we free the pmds as part of the pgd. */ -#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) -#define __pmd_free_tlb(tlb,x) do { } while (0) -#define pud_populate(mm, pmd, pte) BUG() -#endif +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + BUG(); + return (pmd_t *)2; +} + +static inline void pmd_free(pmd_t *pmd) +{ +} + +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +{ +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + BUG(); +} +#endif /* CONFIG_X86_PAE */ #endif /* _I386_PGALLOC_H */ diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index 5af7a44ed902..62bb06575d5a 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h @@ -157,6 +157,4 @@ static inline unsigned long pte_pfn(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) -#define __pmd_free_tlb(tlb, x) do { } while (0) - #endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 4f3838adbb30..2c3ce4c69b25 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include -- cgit v1.3-8-gc7d7 From 12d6f21eacc21d84a809829543f2fe45c7e37319 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:33:58 +0100 Subject: x86: do not PSE on CONFIG_DEBUG_PAGEALLOC=y get more testing of the c_p_a() code done by not turning off PSE on DEBUG_PAGEALLOC. this simplifies the early pagetable setup code, and tests the largepage-splitup code quite heavily. In the end, all the largepages will be split up pretty quickly, so there's no difference to how DEBUG_PAGEALLOC worked before. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 7 ------- arch/x86/mm/pageattr_32.c | 12 +++++++++++- include/asm-x86/cacheflush.h | 5 ----- include/linux/mm.h | 14 +++++++++++++- init/main.c | 5 +++++ 5 files changed, 29 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bba850b05d0e..db28aa9e2f69 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -641,13 +641,6 @@ void __init early_cpu_init(void) nexgen_init_cpu(); umc_init_cpu(); early_cpu_detect(); - -#ifdef CONFIG_DEBUG_PAGEALLOC - /* pse is not compatible with on-the-fly unmapping, - * disable it even if the cpus claim to support it. - */ - setup_clear_cpu_cap(X86_FEATURE_PSE); -#endif } /* Make sure %fs is initialized properly in idle threads */ diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c index 9cf2fea54eb5..dd49b16b3a0e 100644 --- a/arch/x86/mm/pageattr_32.c +++ b/arch/x86/mm/pageattr_32.c @@ -61,13 +61,17 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); + gfp_t gfp_flags = GFP_KERNEL; unsigned long flags; unsigned long addr; pte_t *pbase, *tmp; struct page *base; int i, level; - base = alloc_pages(GFP_KERNEL, 0); +#ifdef CONFIG_DEBUG_PAGEALLOC + gfp_flags = GFP_ATOMIC; +#endif + base = alloc_pages(gfp_flags, 0); if (!base) return -ENOMEM; @@ -218,6 +222,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable) numpages * PAGE_SIZE); } + /* + * If page allocator is not up yet then do not call c_p_a(): + */ + if (!debug_pagealloc_enabled) + return; + /* * the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 9411a2d3f19c..fccb563e2305 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -29,11 +29,6 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot); int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot); void clflush_cache_range(void *addr, int size); -#ifdef CONFIG_DEBUG_PAGEALLOC -/* internal debugging function */ -void kernel_map_pages(struct page *page, int numpages, int enable); -#endif - #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c22d971afa7..1bba6789a50a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm, } #endif /* CONFIG_PROC_FS */ -#ifndef CONFIG_DEBUG_PAGEALLOC +#ifdef CONFIG_DEBUG_PAGEALLOC +extern int debug_pagealloc_enabled; + +extern void kernel_map_pages(struct page *page, int numpages, int enable); + +static inline void enable_debug_pagealloc(void) +{ + debug_pagealloc_enabled = 1; +} +#else static inline void kernel_map_pages(struct page *page, int numpages, int enable) {} +static inline void enable_debug_pagealloc(void) +{ +} #endif extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); diff --git a/init/main.c b/init/main.c index 3316dffe3e57..cb81ed116f62 100644 --- a/init/main.c +++ b/init/main.c @@ -318,6 +318,10 @@ static int __init unknown_bootoption(char *param, char *val) return 0; } +#ifdef CONFIG_DEBUG_PAGEALLOC +int __read_mostly debug_pagealloc_enabled = 0; +#endif + static int __init init_setup(char *str) { unsigned int i; @@ -552,6 +556,7 @@ asmlinkage void __init start_kernel(void) preempt_disable(); build_all_zonelists(); page_alloc_init(); + enable_debug_pagealloc(); printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, -- cgit v1.3-8-gc7d7 From f212ec4b7b4d84290f12c9c0416cdea283bf5f40 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Wed, 30 Jan 2008 13:34:11 +0100 Subject: x86: early boot debugging via FireWire (ohci1394_dma=early) This patch adds a new configuration option, which adds support for a new early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch() to decide wether OHCI-1394 FireWire controllers should be initialized and enabled for physical DMA access to allow remote debugging of early problems like issues ACPI or other subsystems which are executed very early. If the config option is not enabled, no code is changed, and if the boot paramenter is not given, no new code is executed, and independent of that, all new code is freed after boot, so the config option can be even enabled in standard, non-debug kernels. With specialized tools, it is then possible to get debugging information from machines which have no serial ports (notebooks) such as the printk buffer contents, or any data which can be referenced from global pointers, if it is stored below the 4GB limit and even memory dumps of of the physical RAM region below the 4GB limit can be taken without any cooperation from the CPU of the host, so the machine can be crashed early, it does not matter. In the extreme, even kernel debuggers can be accessed in this way. I wrote a small kgdb module and an accompanying gdb stub for FireWire which allows to gdb to talk to kgdb using remote remory reads and writes over FireWire. An version of the gdb stub fore FireWire is able to read all global data from a system which is running a a normal kernel without any kernel debugger, without any interruption or support of the system's CPU. That way, e.g. the task struct and so on can be read and even manipulated when the physical DMA access is granted. A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt and I've put a copy online at ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt It also has links to all the tools which are available to make use of it another copy of it is online at: ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff Signed-Off-By: Bernhard Kaindl Tested-By: Thomas Renninger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/debugging-via-ohci1394.txt | 179 +++++++++++++++++++ arch/x86/kernel/setup_32.c | 11 ++ arch/x86/kernel/setup_64.c | 11 ++ drivers/Makefile | 2 +- drivers/ieee1394/Makefile | 1 + drivers/ieee1394/init_ohci1394_dma.c | 285 +++++++++++++++++++++++++++++++ include/asm-x86/fixmap_32.h | 3 + include/asm-x86/fixmap_64.h | 3 + include/linux/init_ohci1394_dma.h | 4 + lib/Kconfig.debug | 28 +++ 10 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 Documentation/debugging-via-ohci1394.txt create mode 100644 drivers/ieee1394/init_ohci1394_dma.c create mode 100644 include/linux/init_ohci1394_dma.h (limited to 'include/linux') diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt new file mode 100644 index 000000000000..de4804e8b396 --- /dev/null +++ b/Documentation/debugging-via-ohci1394.txt @@ -0,0 +1,179 @@ + + Using physical DMA provided by OHCI-1394 FireWire controllers for debugging + --------------------------------------------------------------------------- + +Introduction +------------ + +Basically all FireWire controllers which are in use today are compliant +to the OHCI-1394 specification which defines the controller to be a PCI +bus master which uses DMA to offload data transfers from the CPU and has +a "Physical Response Unit" which executes specific requests by employing +PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver. + +Once properly configured, remote machines can send these requests to +ask the OHCI-1394 controller to perform read and write requests on +physical system memory and, for read requests, send the result of +the physical memory read back to the requester. + +With that, it is possible to debug issues by reading interesting memory +locations such as buffers like the printk buffer or the process table. + +Retrieving a full system memory dump is also possible over the FireWire, +using data transfer rates in the order of 10MB/s or more. + +Memory access is currently limited to the low 4G of physical address +space which can be a problem on IA64 machines where memory is located +mostly above that limit, but it is rarely a problem on more common +hardware such as hardware based on x86, x86-64 and PowerPC. + +Together with a early initialization of the OHCI-1394 controller for debugging, +this facility proved most useful for examining long debugs logs in the printk +buffer on to debug early boot problems in areas like ACPI where the system +fails to boot and other means for debugging (serial port) are either not +available (notebooks) or too slow for extensive debug information (like ACPI). + +Drivers +------- + +The OHCI-1394 drivers in drivers/firewire and drivers/ieee1394 initialize +the OHCI-1394 controllers to a working state and can be used to enable +physical DMA. By default you only have to load the driver, and physical +DMA access will be granted to all remote nodes, but it can be turned off +when using the ohci1394 driver. + +Because these drivers depend on the PCI enumeration to be completed, an +initialization routine which can runs pretty early (long before console_init(), +which makes the printk buffer appear on the console can be called) was written. + +To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu: +Provide code for enabling DMA over FireWire early on boot) and pass the +parameter "ohci1394_dma=early" to the recompiled kernel on boot. + +Tools +----- + +firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported +it from PowerPC to x86 and x86_64 and added functionality, firescope can now +be used to view the printk buffer of a remote machine, even with live update. + +Bernhard Kaindl enhanced firescope to support accessing 64-bit machines +from 32-bit firescope and vice versa: +- ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2 + +and he implemented fast system dump (alpha version - read README.txt): +- ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2 + +There is also a gdb proxy for firewire which allows to use gdb to access +data which can be referenced from symbols found by gdb in vmlinux: +- ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2 + +The latest version of this gdb proxy (fireproxy-0.34) can communicate (not +yet stable) with kgdb over an memory-based communication module (kgdbom). + +Getting Started +--------------- + +The OHCI-1394 specification regulates that the OHCI-1394 controller must +disable all physical DMA on each bus reset. + +This means that if you want to debug an issue in a system state where +interrupts are disabled and where no polling of the OHCI-1394 controller +for bus resets takes place, you have to establish any FireWire cable +connections and fully initialize all FireWire hardware __before__ the +system enters such state. + +Step-by-step instructions for using firescope with early OHCI initialization: + +1) Verify that your hardware is supported: + + Load the ohci1394 or the fw-ohci module and check your kernel logs. + You should see a line similar to + + ohci1394: fw-host0: OHCI-1394 1.1 (PCI): IRQ=[18] MMIO=[fe9ff800-fe9fffff] + ... Max Packet=[2048] IR/IT contexts=[4/8] + + when loading the driver. If you have no supported controller, many PCI, + CardBus and even some Express cards which are fully compliant to OHCI-1394 + specification are available. If it requires no driver for Windows operating + systems, it most likely is. Only specialized shops have cards which are not + compliant, they are based on TI PCILynx chips and require drivers for Win- + dows operating systems. + +2) Establish a working FireWire cable connection: + + Any FireWire cable, as long at it provides electrically and mechanically + stable connection and has matching connectors (there are small 4-pin and + large 6-pin FireWire ports) will do. + + If an driver is running on both machines you should see a line like + + ieee1394: Node added: ID:BUS[0-01:1023] GUID[0090270001b84bba] + + on both machines in the kernel log when the cable is plugged in + and connects the two machines. + +3) Test physical DMA using firescope: + + On the debug host, + - load the raw1394 module, + - make sure that /dev/raw1394 is accessible, + then start firescope: + + $ firescope + Port 0 (ohci1394) opened, 2 nodes detected + + FireScope + --------- + Target : + Gen : 1 + [Ctrl-T] choose target + [Ctrl-H] this menu + [Ctrl-Q] quit + + ------> Press Ctrl-T now, the output should be similar to: + + 2 nodes available, local node is: 0 + 0: ffc0, uuid: 00000000 00000000 [LOCAL] + 1: ffc1, uuid: 00279000 ba4bb801 + + Besides the [LOCAL] node, it must show another node without error message. + +4) Prepare for debugging with early OHCI-1394 initialization: + + 4.1) Kernel compilation and installation on debug target + + Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT + (Kernel hacking: Provide code for enabling DMA over FireWire early on boot) + enabled and install it on the machine to be debugged (debug target). + + 4.2) Transfer the System.map of the debugged kernel to the debug host + + Copy the System.map of the kernel be debugged to the debug host (the host + which is connected to the debugged machine over the FireWire cable). + +5) Retrieving the printk buffer contents: + + With the FireWire cable connected, the OHCI-1394 driver on the debugging + host loaded, reboot the debugged machine, booting the kernel which has + CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early. + + Then, on the debugging host, run firescope, for example by using -A: + + firescope -A System.map-of-debug-target-kernel + + Note: -A automatically attaches to the first non-local node. It only works + reliably if only connected two machines are connected using FireWire. + + After having attached to the debug target, press Ctrl-D to view the + complete printk buffer or Ctrl-U to enter auto update mode and get an + updated live view of recent kernel messages logged on the debug target. + + Call "firescope -h" to get more information on firescope's options. + +Notes +----- +Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs + +FireWire is a trademark of Apple Inc. - for more information please refer to: +http://en.wikipedia.org/wiki/FireWire diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 9c0ef4945a58..62adc5f20be5 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -45,6 +45,7 @@ #include #include #include +#include #include