From 7b7232f3fb5ecd7c30cb52df368070cc5f5ca614 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Feb 2006 21:06:49 -0600 Subject: [SCSI] iscsi update: cleanup iscsi class interface From: michaelc@cs.wisc.edu fujita.tomonori@lab.ntt.co.jp da-x@monatomic.org and err path fixup from: ogerlitz@voltaire.com This patch cleans up that interface by having the lld and class pass a iscsi_cls_session or iscsi_cls_conn between each other when the function is used by HW and SW iscsi llds. This way the lld does not have to remember if it has to send a handle or pointer and a handle or pointer to connection, session or host. This also has the class verify the session handle that gets passed from userspace instead of using the pointer passed into the kernel directly. Signed-off-by: Mike Christie Signed-off-by: Alex Aizman Signed-off-by: Dmitry Yusupov Signed-off-by: James Bottomley --- include/scsi/iscsi_if.h | 3 --- include/scsi/scsi_transport_iscsi.h | 34 ++++++++++++++++++---------------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 3e5cb5ab2d34..e5618b90996e 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -163,9 +163,6 @@ enum iscsi_param { }; #define ISCSI_PARAM_MAX 14 -typedef uint64_t iscsi_sessionh_t; /* iSCSI Data-Path session handle */ -typedef uint64_t iscsi_connh_t; /* iSCSI Data-Path connection handle */ - #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle) #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr) #define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata)) diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 16602a547a63..b41cf077e54b 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -63,25 +63,28 @@ struct iscsi_transport { int max_lun; unsigned int max_conn; unsigned int max_cmd_len; - struct Scsi_Host *(*create_session) (struct scsi_transport_template *t, - uint32_t initial_cmdsn); - void (*destroy_session) (struct Scsi_Host *shost); - struct iscsi_cls_conn *(*create_conn) (struct Scsi_Host *shost, + struct iscsi_cls_session *(*create_session) + (struct scsi_transport_template *t, uint32_t sn, uint32_t *sid); + void (*destroy_session) (struct iscsi_cls_session *session); + struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess, uint32_t cid); - int (*bind_conn) (iscsi_sessionh_t session, iscsi_connh_t conn, + int (*bind_conn) (struct iscsi_cls_session *session, + struct iscsi_cls_conn *cls_conn, uint32_t transport_fd, int is_leading); - int (*start_conn) (iscsi_connh_t conn); - void (*stop_conn) (iscsi_connh_t conn, int flag); + int (*start_conn) (struct iscsi_cls_conn *conn); + void (*stop_conn) (struct iscsi_cls_conn *conn, int flag); void (*destroy_conn) (struct iscsi_cls_conn *conn); - int (*set_param) (iscsi_connh_t conn, enum iscsi_param param, + int (*set_param) (struct iscsi_cls_conn *conn, enum iscsi_param param, uint32_t value); - int (*get_conn_param) (void *conndata, enum iscsi_param param, + int (*get_conn_param) (struct iscsi_cls_conn *conn, + enum iscsi_param param, uint32_t *value); - int (*get_session_param) (struct Scsi_Host *shost, + int (*get_session_param) (struct iscsi_cls_session *session, enum iscsi_param param, uint32_t *value); - int (*send_pdu) (iscsi_connh_t conn, struct iscsi_hdr *hdr, + int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); - void (*get_stats) (iscsi_connh_t conn, struct iscsi_stats *stats); + void (*get_stats) (struct iscsi_cls_conn *conn, + struct iscsi_stats *stats); }; /* @@ -93,15 +96,14 @@ extern int iscsi_unregister_transport(struct iscsi_transport *tt); /* * control plane upcalls */ -extern void iscsi_conn_error(iscsi_connh_t conn, enum iscsi_err error); -extern int iscsi_recv_pdu(iscsi_connh_t conn, struct iscsi_hdr *hdr, +extern void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error); +extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; - iscsi_connh_t connh; int active; /* must be accessed with the connlock */ struct device dev; /* sysfs transport/container device */ struct mempool_zone *z_error; @@ -113,7 +115,7 @@ struct iscsi_cls_conn { container_of(_dev, struct iscsi_cls_conn, dev) struct iscsi_cls_session { - struct list_head list; /* item in session_list */ + struct list_head sess_list; /* item in session_list */ struct iscsi_transport *transport; struct device dev; /* sysfs transport/container device */ }; -- cgit v1.3-6-gb490 From 9ed2ad8648eb974ee670045d41b5a51b763e3aa1 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Tue, 31 Jan 2006 14:26:25 -0800 Subject: [IA64] add syscall entry for *at() Wire up the ia64 syscalls for *at() functions. Signed-off-by: Ken Chen Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 13 +++++++++++++ arch/ia64/kernel/fsys.S | 29 +++-------------------------- include/asm-ia64/unistd.h | 13 +++++++++++++ 3 files changed, 29 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 7a6ffd613789..6b88de8d91f8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1601,5 +1601,18 @@ sys_call_table: data8 sys_inotify_add_watch data8 sys_inotify_rm_watch data8 sys_migrate_pages // 1280 + data8 sys_openat + data8 sys_mkdirat + data8 sys_mknodat + data8 sys_fchownat + data8 sys_futimesat // 1285 + data8 sys_newfstatat + data8 sys_unlinkat + data8 sys_renameat + data8 sys_linkat + data8 sys_symlinkat // 1290 + data8 sys_readlinkat + data8 sys_fchmodat + data8 sys_faccessat .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index ce423910ca97..ac6055c83115 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -878,31 +878,8 @@ fsyscall_table: data8 0 // timer_delete data8 0 // clock_settime data8 fsys_clock_gettime // clock_gettime - data8 0 // clock_getres // 1255 - data8 0 // clock_nanosleep - data8 0 // fstatfs64 - data8 0 // statfs64 - data8 0 - data8 0 // 1260 - data8 0 - data8 0 // mq_open - data8 0 // mq_unlink - data8 0 // mq_timedsend - data8 0 // mq_timedreceive // 1265 - data8 0 // mq_notify - data8 0 // mq_getsetattr - data8 0 // kexec_load - data8 0 - data8 0 // 1270 - data8 0 - data8 0 - data8 0 - data8 0 - data8 0 // 1275 - data8 0 - data8 0 - data8 0 - data8 0 - data8 0 // 1280 + #define __NR_syscall_last 1255 + + .space 8*(NR_syscalls + 1024 - __NR_syscall_last), 0 .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 962f9bd1bdff..a151eb1fc73a 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -270,6 +270,19 @@ #define __NR_inotify_add_watch 1278 #define __NR_inotify_rm_watch 1279 #define __NR_migrate_pages 1280 +#define __NR_openat 1281 +#define __NR_mkdirat 1282 +#define __NR_mknodat 1283 +#define __NR_fchownat 1284 +#define __NR_futimesat 1285 +#define __NR_newfstatat 1286 +#define __NR_unlinkat 1287 +#define __NR_renameat 1288 +#define __NR_linkat 1289 +#define __NR_symlinkat 1290 +#define __NR_readlinkat 1291 +#define __NR_fchmodat 1292 +#define __NR_faccessat 1293 #ifdef __KERNEL__ -- cgit v1.3-6-gb490 From 9621a4ef8a29d11118f44def053931bcafb0dfc2 Mon Sep 17 00:00:00 2001 From: Janak Desai Date: Wed, 8 Feb 2006 15:43:38 -0800 Subject: [IA64] unshare system call registration for ia64 Registers system call for the ia64 architecture. Reserves space for ppoll and pselect, and adds unshare at system call number 1296. Signed-off-by: Janak Desai Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 3 +++ include/asm-ia64/unistd.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 6b88de8d91f8..27b222c277e4 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1614,5 +1614,8 @@ sys_call_table: data8 sys_readlinkat data8 sys_fchmodat data8 sys_faccessat + data8 sys_ni_syscall // reserved for pselect + data8 sys_ni_syscall // 1295 reserved for ppoll + data8 sys_unshare .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index a151eb1fc73a..019956c613e4 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -283,12 +283,14 @@ #define __NR_readlinkat 1291 #define __NR_fchmodat 1292 #define __NR_faccessat 1293 +/* 1294, 1295 reserved for pselect/ppoll */ +#define __NR_unshare 1296 #ifdef __KERNEL__ #include -#define NR_syscalls 270 /* length of syscall table */ +#define NR_syscalls 273 /* length of syscall table */ #define __ARCH_WANT_SYS_RT_SIGACTION -- cgit v1.3-6-gb490 From 9ac95f2f90e022c16d293d7978faddf7e779a1a9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 9 Feb 2006 22:41:50 +0300 Subject: [PATCH] do_sigaction: cleanup ->sa_mask manipulation Clear unblockable signals beforehand. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- kernel/signal.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cfcd1c7865e..9c1da0269a18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1098,7 +1098,7 @@ extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(int, struct sigqueue *, struct task_struct *); extern int send_group_sigqueue(int, struct sigqueue *, struct task_struct *); -extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *); +extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); /* These can be the second arg to send_sig_info/send_group_sig_info. */ diff --git a/kernel/signal.c b/kernel/signal.c index 01a1e7f7acf7..ea154104a00b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) } int -do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) +do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct k_sigaction *k; sigset_t mask; @@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) *oact = *k; if (act) { + sigdelsetmask(&act->sa.sa_mask, + sigmask(SIGKILL) | sigmask(SIGSTOP)); /* * POSIX 3.3.1.3: * "Setting a signal action to SIG_IGN for a signal that is @@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) read_lock(&tasklist_lock); spin_lock_irq(&t->sighand->siglock); *k = *act; - sigdelsetmask(&k->sa.sa_mask, - sigmask(SIGKILL) | sigmask(SIGSTOP)); sigemptyset(&mask); sigaddset(&mask, sig); rm_from_queue_full(&mask, &t->signal->shared_pending); @@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) } *k = *act; - sigdelsetmask(&k->sa.sa_mask, - sigmask(SIGKILL) | sigmask(SIGSTOP)); } spin_unlock_irq(¤t->sighand->siglock); -- cgit v1.3-6-gb490 From a70ea994a0d83fd0151a070be72b87d014ef0a7e Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 9 Feb 2006 16:40:11 -0800 Subject: [NETLINK]: Fix a severe bug netlink overrun was broken while improvement of netlink. Destination socket is used in the place where it was meant to be source socket, so that now overrun is never sent to user netlink sockets, when it should be, and it even can be set on kernel socket, which results in complete deadlock of rtnetlink. Suggested fix is to restore status quo passing source socket as additional argument to netlink_attachskb(). A little explanation: overrun is set on a socket, when it failed to receive some message and sender of this messages does not or even have no way to handle this error. This happens in two cases: 1. when kernel sends something. Kernel never retransmits and cannot wait for buffer space. 2. when user sends a broadcast and the message was not delivered to some recipients. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- ipc/mqueue.c | 3 ++- net/netlink/af_netlink.c | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6a2ccf78a356..c256ebe2a7b4 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -160,7 +160,8 @@ extern int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfilp(struct file *filp); -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo); +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + long timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 59302fc3643b..fd2e26b6f966 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1018,7 +1018,8 @@ retry: goto out; } - ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT); + ret = netlink_attachskb(sock, nc, 0, + MAX_SCHEDULE_TIMEOUT, NULL); if (ret == 1) goto retry; if (ret) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2101b45d2ec6..6b9772d95872 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -702,7 +702,8 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo) +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, + long timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -712,7 +713,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!timeo) { - if (!nlk->pid) + if (!ssk || nlk_sk(ssk)->pid == 0) netlink_overrun(sk); sock_put(sk); kfree_skb(skb); @@ -797,7 +798,7 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } - err = netlink_attachskb(sk, skb, nonblock, timeo); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; if (err) -- cgit v1.3-6-gb490 From d93077fb0e7cb9d4f4094a649501d840c55fdc8b Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 9 Feb 2006 16:58:46 -0800 Subject: [IRDA]: Set proper IrLAP device address length This patch set IrDA's addr_len properly, i.e to 4 bytes, the size of the IrLAP device address. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/net/irda/irlap.h | 3 +++ net/irda/irda_device.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index f55e86e75030..2127cae1e0a6 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -50,6 +50,9 @@ /* May be different when we get VFIR */ #define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER) +/* Each IrDA device gets a random 32 bits IRLAP device address */ +#define LAP_ALEN 4 + #define BROADCAST 0xffffffff /* Broadcast device address */ #define CBROADCAST 0xfe /* Connection broadcast address */ #define XID_FORMAT 0x01 /* Discovery XID format */ diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 890bac0d4a56..e3debbdb67f5 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -343,12 +343,12 @@ static void irda_task_timer_expired(void *data) static void irda_device_setup(struct net_device *dev) { dev->hard_header_len = 0; - dev->addr_len = 0; + dev->addr_len = LAP_ALEN; dev->type = ARPHRD_IRDA; dev->tx_queue_len = 8; /* Window size + 1 s-frame */ - memset(dev->broadcast, 0xff, 4); + memset(dev->broadcast, 0xff, LAP_ALEN); dev->mtu = 2048; dev->flags = IFF_NOARP; -- cgit v1.3-6-gb490 From b37ce281d729181b9862c4e3e112f9b5eea74ac9 Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:59:11 -0800 Subject: [PATCH] powerpc: unshare system call registration Registers system call for the powerpc architecture. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Benjamin Herrenschmidt Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/systbl.S | 1 + include/asm-powerpc/unistd.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 55b9fc10694e..8a9f994ed917 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -321,3 +321,4 @@ SYSCALL(spu_run) SYSCALL(spu_create) COMPAT_SYS(pselect6) COMPAT_SYS(ppoll) +SYSCALL(unshare) diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index a40cdff21a88..35556993f066 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -300,8 +300,9 @@ #define __NR_spu_create 279 #define __NR_pselect6 280 #define __NR_ppoll 281 +#define __NR_unshare 282 -#define __NR_syscalls 282 +#define __NR_syscalls 283 #ifdef __KERNEL__ #define __NR__exit __NR_exit -- cgit v1.3-6-gb490 From 9c15e852a524d55ab768cf48c97f5c684f876af2 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Fri, 10 Feb 2006 01:51:05 -0800 Subject: [PATCH] kexec: fix in free initrd when overlapped with crashkernel region It is possible that the reserved crashkernel region can be overlapped with initrd since the bootloader sets the initrd location. When the initrd region is freed, the second kernel memory will not be contiguous. The Kexec_load can cause an oops since there is no contiguous memory to write the second kernel or this memory could be used in the first kernel itself and may not be part of the dump. For example, on powerpc, the initrd is located at 36MB and the crashkernel starts at 32MB. The kexec_load caused panic since writing into non-allocated memory (after 36MB). We could see the similar issue even on other archs. One possibility is to move the initrd outside of crashkernel region. But, the initrd region will be freed anyway before the system is up. This patch fixes this issue and frees only regions that are not part of crashkernel memory in case overlaps. Signed-off-by: Haren Myneni Acked-by: "Eric W. Biederman" Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 1 + init/initramfs.c | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index a311f58c8a7c..cfb3410e32b1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -6,6 +6,7 @@ #include #include #include +#include #include /* Verify architecture specific macros are defined */ diff --git a/init/initramfs.c b/init/initramfs.c index 0c5d9a3f951b..637344b05981 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -466,10 +466,32 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) extern char __initramfs_start[], __initramfs_end[]; #ifdef CONFIG_BLK_DEV_INITRD #include +#include static void __init free_initrd(void) { - free_initrd_mem(initrd_start, initrd_end); +#ifdef CONFIG_KEXEC + unsigned long crashk_start = (unsigned long)__va(crashk_res.start); + unsigned long crashk_end = (unsigned long)__va(crashk_res.end); + + /* + * If the initrd region is overlapped with crashkernel reserved region, + * free only memory that is not part of crashkernel region. + */ + if (initrd_start < crashk_end && initrd_end > crashk_start) { + /* + * Initialize initrd memory region since the kexec boot does + * not do. + */ + memset((void *)initrd_start, 0, initrd_end - initrd_start); + if (initrd_start < crashk_start) + free_initrd_mem(initrd_start, crashk_start); + if (initrd_end > crashk_end) + free_initrd_mem(crashk_end, initrd_end); + } else +#endif + free_initrd_mem(initrd_start, initrd_end); + initrd_start = 0; initrd_end = 0; } -- cgit v1.3-6-gb490 From 7a8ef1cb774e5438d292365626f9b96616283706 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 10 Feb 2006 01:51:08 -0800 Subject: [PATCH] x86: don't initialise cpu_possible_map to all ones Initialising cpu_possible_map to all-ones with CONFIG_HOTPLUG_CPU means that a) All for_each_cpu() loops will iterate across all NR_CPUS CPUs, rather than over possible ones. That can be quite expensive. b) Soon we'll be allocating per-cpu areas only for possible CPUs. So with CPU_MASK_ALL, we'll be wasting memory. I also switched voyager over to not use CPU_MASK_ALL in the non-CPU-hotplug case. Should be OK.. I note that parisc is also using CPU_MASK_ALL. Suggest that it stop doing that. Cc: James Bottomley Cc: Kyle McMartin Cc: Paul Jackson Cc: Ashok Raj Cc: Zwane Mwaikambo Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 4 ---- arch/i386/mach-voyager/voyager_smp.c | 2 +- include/linux/cpumask.h | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 255adb498268..fb00ab7b7612 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -87,11 +87,7 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); -#ifdef CONFIG_HOTPLUG_CPU -cpumask_t cpu_possible_map = CPU_MASK_ALL; -#else cpumask_t cpu_possible_map; -#endif EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 72a1b9cae2e4..6e4c3baef6cc 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -240,7 +240,7 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; cpumask_t cpu_callin_map = CPU_MASK_NONE; cpumask_t cpu_callout_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_callout_map); -cpumask_t cpu_possible_map = CPU_MASK_ALL; +cpumask_t cpu_possible_map = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_possible_map); /* The per processor IRQ masks (these are usually kept in sync) */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 20b446f26ecd..60e56c6e03dd 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -328,7 +328,7 @@ static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, * bitmap of size NR_CPUS. * * #ifdef CONFIG_HOTPLUG_CPU - * cpu_possible_map - all NR_CPUS bits set + * cpu_possible_map - has bit 'cpu' set iff cpu is populatable * cpu_present_map - has bit 'cpu' set iff cpu is populated * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler * #else -- cgit v1.3-6-gb490 From 8977d929e49021d9a6e031310aab01fa72f849c2 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Fri, 10 Feb 2006 01:51:14 -0800 Subject: [PATCH] tty buffering stall fix Prevent stalled processing of received data when a driver allocates tty buffer space but does not immediately follow the allocation with more data and a call to schedule receive tty processing. (example: hvc_console) This bug was introduced by the first locking patch for the new tty buffering. Signed-off-by: Paul Fulghum Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 30 ++++++++++++++++++++++-------- include/linux/kbd_kern.h | 4 +++- include/linux/tty.h | 2 ++ include/linux/tty_flip.h | 4 +++- 4 files changed, 30 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 076e07c1da38..a23816d3e9a1 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -268,6 +268,8 @@ static struct tty_buffer *tty_buffer_alloc(size_t size) p->size = size; p->next = NULL; p->active = 0; + p->commit = 0; + p->read = 0; p->char_buf_ptr = (char *)(p->data); p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size; /* printk("Flip create %p\n", p); */ @@ -298,6 +300,8 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size) *tbh = t->next; t->next = NULL; t->used = 0; + t->commit = 0; + t->read = 0; /* DEBUG ONLY */ memset(t->data, '*', size); /* printk("Flip recycle %p\n", t); */ @@ -335,6 +339,7 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size) if (b != NULL) { b->next = n; b->active = 0; + b->commit = b->used; } else tty->buf.head = n; tty->buf.tail = n; @@ -2752,6 +2757,9 @@ static void flush_to_ldisc(void *private_) unsigned long flags; struct tty_ldisc *disc; struct tty_buffer *tbuf; + int count; + char *char_buf; + unsigned char *flag_buf; disc = tty_ldisc_ref(tty); if (disc == NULL) /* !TTY_LDISC */ @@ -2765,16 +2773,20 @@ static void flush_to_ldisc(void *private_) goto out; } spin_lock_irqsave(&tty->buf.lock, flags); - while((tbuf = tty->buf.head) != NULL && !tbuf->active) { + while((tbuf = tty->buf.head) != NULL) { + while ((count = tbuf->commit - tbuf->read) != 0) { + char_buf = tbuf->char_buf_ptr + tbuf->read; + flag_buf = tbuf->flag_buf_ptr + tbuf->read; + tbuf->read += count; + spin_unlock_irqrestore(&tty->buf.lock, flags); + disc->receive_buf(tty, char_buf, flag_buf, count); + spin_lock_irqsave(&tty->buf.lock, flags); + } + if (tbuf->active) + break; tty->buf.head = tbuf->next; if (tty->buf.head == NULL) tty->buf.tail = NULL; - spin_unlock_irqrestore(&tty->buf.lock, flags); - /* printk("Process buffer %p for %d\n", tbuf, tbuf->used); */ - disc->receive_buf(tty, tbuf->char_buf_ptr, - tbuf->flag_buf_ptr, - tbuf->used); - spin_lock_irqsave(&tty->buf.lock, flags); tty_buffer_free(tty, tbuf); } spin_unlock_irqrestore(&tty->buf.lock, flags); @@ -2871,8 +2883,10 @@ void tty_flip_buffer_push(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->buf.lock, flags); - if (tty->buf.tail != NULL) + if (tty->buf.tail != NULL) { tty->buf.tail->active = 0; + tty->buf.tail->commit = tty->buf.tail->used; + } spin_unlock_irqrestore(&tty->buf.lock, flags); if (tty->low_latency) diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 3aed37314ab8..e87c32a5c86a 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -153,8 +153,10 @@ static inline void con_schedule_flip(struct tty_struct *t) { unsigned long flags; spin_lock_irqsave(&t->buf.lock, flags); - if (t->buf.tail != NULL) + if (t->buf.tail != NULL) { t->buf.tail->active = 0; + t->buf.tail->commit = t->buf.tail->used; + } spin_unlock_irqrestore(&t->buf.lock, flags); schedule_work(&t->buf.work); } diff --git a/include/linux/tty.h b/include/linux/tty.h index a7bd3b4558d2..f45cd74e6f24 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -58,6 +58,8 @@ struct tty_buffer { int used; int size; int active; + int commit; + int read; /* Data points here */ unsigned long data[0]; }; diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 82961eb19888..222faf97d5f9 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -29,8 +29,10 @@ _INLINE_ void tty_schedule_flip(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty->buf.lock, flags); - if (tty->buf.tail != NULL) + if (tty->buf.tail != NULL) { tty->buf.tail->active = 0; + tty->buf.tail->commit = tty->buf.tail->used; + } spin_unlock_irqrestore(&tty->buf.lock, flags); schedule_delayed_work(&tty->buf.work, 1); } -- cgit v1.3-6-gb490 From e19816808346cc1619733532a267a11dce8f8a12 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 10 Feb 2006 22:40:51 +0000 Subject: [ARM] 3326/1: H1940 - Control latches Patch from Ben Dooks Define the bits for the two board control latches that control various items on the H1940 iPAQ. Signed-off-by: Ben Dooks Signed-off-by: Arnaud Patard Signed-off-by: Russell King --- arch/arm/mach-s3c2410/mach-h1940.c | 29 +++++++++++++- include/asm-arm/arch-s3c2410/h1940-latch.h | 64 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 include/asm-arm/arch-s3c2410/h1940-latch.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c index 1c316f14ed94..646a3a5d33a5 100644 --- a/arch/arm/mach-s3c2410/mach-h1940.c +++ b/arch/arm/mach-s3c2410/mach-h1940.c @@ -46,10 +46,11 @@ #include #include -//#include + #include #include +#include #include #include @@ -59,7 +60,12 @@ #include "cpu.h" static struct map_desc h1940_iodesc[] __initdata = { - /* nothing here yet */ + [0] = { + .virtual = (unsigned long)H1940_LATCH, + .pfn = __phys_to_pfn(H1940_PA_LATCH), + .length = SZ_16K, + .type = MT_DEVICE + }, }; #define UCON S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK @@ -92,6 +98,25 @@ static struct s3c2410_uartcfg h1940_uartcfgs[] = { } }; +/* Board control latch control */ + +static unsigned int latch_state = H1940_LATCH_DEFAULT; + +void h1940_latch_control(unsigned int clear, unsigned int set) +{ + unsigned long flags; + + local_irq_save(flags); + + latch_state &= ~clear; + latch_state |= set; + + __raw_writel(latch_state, H1940_LATCH); + + local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(h1940_latch_control); /** diff --git a/include/asm-arm/arch-s3c2410/h1940-latch.h b/include/asm-arm/arch-s3c2410/h1940-latch.h new file mode 100644 index 000000000000..c5802411f43d --- /dev/null +++ b/include/asm-arm/arch-s3c2410/h1940-latch.h @@ -0,0 +1,64 @@ +/* linux/include/asm-arm/arch-s3c2410/h1940-latch.h + * + * (c) 2005 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks + * + * iPAQ H1940 series - latch definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#ifndef __ASM_ARCH_H1940_LATCH_H +#define __ASM_ARCH_H1940_LATCH_H + + +#ifndef __ASSEMBLY__ +#define H1940_LATCH ((void __iomem *)0xF8000000) +#else +#define H1940_LATCH 0xF8000000 +#endif + +#define H1940_PA_LATCH (S3C2410_CS2) + +/* SD layer latch */ + +#define H1940_LATCH_SDQ1 (1<<16) +#define H1940_LATCH_LCD_P1 (1<<17) +#define H1940_LATCH_LCD_P2 (1<<18) +#define H1940_LATCH_LCD_P3 (1<<19) +#define H1940_LATCH_MAX1698_nSHUTDOWN (1<<20) /* LCD backlight */ +#define H1940_LATCH_LED_RED (1<<21) +#define H1940_LATCH_SDQ7 (1<<22) +#define H1940_LATCH_USB_DP (1<<23) + +/* CPU layer latch */ + +#define H1940_LATCH_UDA_POWER (1<<24) +#define H1940_LATCH_AUDIO_POWER (1<<25) +#define H1940_LATCH_SM803_ENABLE (1<<26) +#define H1940_LATCH_LCD_P4 (1<<27) +#define H1940_LATCH_CPUQ5 (1<<28) /* untraced */ +#define H1940_LATCH_BLUETOOTH_POWER (1<<29) /* active high */ +#define H1940_LATCH_LED_GREEN (1<<30) +#define H1940_LATCH_LED_FLASH (1<<31) + +/* default settings */ + +#define H1940_LATCH_DEFAULT \ + H1940_LATCH_LCD_P4 | \ + H1940_LATCH_SM803_ENABLE | \ + H1940_LATCH_SDQ1 | \ + H1940_LATCH_LCD_P1 | \ + H1940_LATCH_LCD_P2 | \ + H1940_LATCH_LCD_P3 | \ + H1940_LATCH_MAX1698_nSHUTDOWN | \ + H1940_LATCH_CPUQ5 + +/* control functions */ + +extern void h1940_latch_control(unsigned int clear, unsigned int set); + +#endif /* __ASM_ARCH_H1940_LATCH_H */ -- cgit v1.3-6-gb490 From cff2b760096d1e6feaa31948e7af4abbefe47822 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 11 Feb 2006 17:55:47 -0800 Subject: [PATCH] fstatat64 support The *at patches introduced fstatat and, due to inusfficient research, I used the newfstat functions generally as the guideline. The result is that on 32-bit platforms we don't have all the information needed to implement fstatat64. This patch modifies the code to pass up 64-bit information if __ARCH_WANT_STAT64 is defined. I renamed the syscall entry point to make this clear. Other archs will continue to use the existing code. On x86-64 the compat code is implemented using a new sys32_ function. this is what is done for the other stat syscalls as well. This patch might break some other archs (those which define __ARCH_WANT_STAT64 and which already wired up the syscall). Yet others might need changes to accomodate the compatibility mode. I really don't want to do that work because all this stat handling is a mess (more so in glibc, but the kernel is also affected). It should be done by the arch maintainers. I'll provide some stand-alone test shortly. Those who are eager could compile glibc and run 'make check' (no installation needed). The patch below has been tested on x86 and x86-64. Signed-off-by: Ulrich Drepper Cc: Christoph Hellwig Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 2 +- arch/x86_64/ia32/ia32entry.S | 2 +- arch/x86_64/ia32/sys_ia32.c | 22 ++++++++++++++++++++++ fs/stat.c | 22 ++++++++++++++++++++++ include/asm-i386/unistd.h | 2 +- include/asm-x86_64/ia32_unistd.h | 2 +- include/linux/syscalls.h | 2 ++ 7 files changed, 50 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 5a8b3fb6d27b..ac687d00a1ce 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -299,7 +299,7 @@ ENTRY(sys_call_table) .long sys_mknodat .long sys_fchownat .long sys_futimesat - .long sys_newfstatat /* 300 */ + .long sys_fstatat64 /* 300 */ .long sys_unlinkat .long sys_renameat .long sys_linkat diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index ada4535d0161..00dee176c08e 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -677,7 +677,7 @@ ia32_sys_call_table: .quad sys_mknodat .quad sys_fchownat .quad compat_sys_futimesat - .quad compat_sys_newfstatat /* 300 */ + .quad sys32_fstatat /* 300 */ .quad sys_unlinkat .quad sys_renameat .quad sys_linkat diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 54481af5344a..2bc55af95419 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -180,6 +180,28 @@ sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) return ret; } +asmlinkage long +sys32_fstatat(unsigned int dfd, char __user *filename, + struct stat64 __user* statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_stat64(statbuf, &stat); + +out: + return error; +} + /* * Linux/i386 didn't use to be able to handle more than * 4 system call parameters, so these system calls used a memory diff --git a/fs/stat.c b/fs/stat.c index 24211b030f39..9948cc1685a4 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -261,6 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf) return error; } +#ifndef __ARCH_WANT_STAT64 asmlinkage long sys_newfstatat(int dfd, char __user *filename, struct stat __user *statbuf, int flag) { @@ -281,6 +282,7 @@ asmlinkage long sys_newfstatat(int dfd, char __user *filename, out: return error; } +#endif asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) { @@ -395,6 +397,26 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user * statbuf) return error; } +asmlinkage long sys_fstatat64(int dfd, char __user *filename, + struct stat64 __user *statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_new_stat64(&stat, statbuf); + +out: + return error; +} #endif /* __ARCH_WANT_STAT64 */ void inode_add_bytes(struct inode *inode, loff_t bytes) diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index cf6f2cd9c514..dc81a55dd94d 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -305,7 +305,7 @@ #define __NR_mknodat 297 #define __NR_fchownat 298 #define __NR_futimesat 299 -#define __NR_newfstatat 300 +#define __NR_fstatat64 300 #define __NR_unlinkat 301 #define __NR_renameat 302 #define __NR_linkat 303 diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h index 20468983d453..eeb2bcd635de 100644 --- a/include/asm-x86_64/ia32_unistd.h +++ b/include/asm-x86_64/ia32_unistd.h @@ -305,7 +305,7 @@ #define __NR_ia32_mknodat 297 #define __NR_ia32_fchownat 298 #define __NR_ia32_futimesat 299 -#define __NR_ia32_newfstatat 300 +#define __NR_ia32_fstatat64 300 #define __NR_ia32_unlinkat 301 #define __NR_ia32_renameat 302 #define __NR_ia32_linkat 303 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3877209d23c3..d73501ba7e44 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -557,6 +557,8 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, int mode); asmlinkage long sys_newfstatat(int dfd, char __user *filename, struct stat __user *statbuf, int flag); +asmlinkage long sys_fstatat64(int dfd, char __user *filename, + struct stat64 __user *statbuf, int flag); asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz); asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, -- cgit v1.3-6-gb490 From 33042a9ff4d126ba944b9dc3076665a2029e0a34 Mon Sep 17 00:00:00 2001 From: Chris McDermott Date: Sat, 11 Feb 2006 17:55:50 -0800 Subject: [PATCH] x86-64: Fix HPET timer on x460 [description from AK] The IBM Summit 3 chipset doesn't implement the HPET timer replacement option. Since the current Linux code relies on it use a mixed mode with both PIT for the interrupt and HPET counters for the time keeping. That was already implemented, but didn't work properly because it was still using the last interrupt offset in HPET. This resulted in x460 not booting. Fix this up by using the free running HPET counter. Shouldn't affect any other machine because they either use full HPET mode or no HPET at all. TBD needs a similar 32bit fix. Signed-off-by: Andi Kleen Cc: Pallipadi, Venkatesh" Cc: Bob Picco Cc: Bjorn Helgaas Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 2 +- arch/x86_64/kernel/time.c | 12 +++++++++--- include/asm-x86_64/hpet.h | 2 ++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 6147770b4347..7a0a3e8d5d72 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -708,7 +708,7 @@ static void setup_APIC_timer(unsigned int clocks) local_irq_save(flags); /* wait for irq slice */ - if (vxtime.hpet_address) { + if (vxtime.hpet_address && hpet_use_timer) { int trigger = hpet_readl(HPET_T0_CMP); while (hpet_readl(HPET_COUNTER) >= trigger) /* do nothing */ ; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index dba7237be5c1..3c58c30506a1 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -59,7 +59,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ -static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ +int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -326,7 +326,10 @@ static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) print_symbol("rip %s\n", regs->rip); if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { printk(KERN_WARNING "Falling back to HPET\n"); - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; } @@ -988,7 +991,10 @@ void __init time_init_gtod(void) notsc = 1; if (vxtime.hpet_address && notsc) { timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; #ifdef CONFIG_X86_PM_TIMER diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h index c20c28f5c7a0..08b75c15269a 100644 --- a/include/asm-x86_64/hpet.h +++ b/include/asm-x86_64/hpet.h @@ -55,6 +55,8 @@ extern int is_hpet_enabled(void); extern int hpet_rtc_timer_init(void); extern int oem_force_hpet_timer(void); +extern int hpet_use_timer; + #ifdef CONFIG_HPET_EMULATE_RTC extern int hpet_mask_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); -- cgit v1.3-6-gb490 From 643a654540579b0dcc7a206a4a7475276a41aff0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 11 Feb 2006 17:55:52 -0800 Subject: [PATCH] select: fix returned timeval With David Woodhouse select() presently has a habit of increasing the value of the user's `timeout' argument on return. We were writing back a timeout larger than the original. We _deliberately_ round up, since we know we must wait at _least_ as long as the caller asks us to. The patch adds a couple of helper functions for magnitude comparison of timespecs and of timevals, and uses them to prevent the various poll and select functions from returning a timeout which is larger than the one which was passed in. The patch also fixes a bug in compat_sys_pselect7(): it was adding the new timeout value to the old one and was returning that. It should just return the new timeout value. (We have various handy timespec/timeval-to-from-nsec conversion functions in time.h. But this code open-codes it all). Cc: "David S. Miller" Cc: Andi Kleen Cc: Ulrich Drepper Cc: Thomas Gleixner Cc: george anzinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 37 +++++++++++++++++++++++++------------ fs/select.c | 32 +++++++++++++++++++++++--------- include/linux/compat.h | 20 ++++++++++++++++++++ include/linux/time.h | 25 ++++++++++++++++++++++++- 4 files changed, 92 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/compat.c b/fs/compat.c index 70c5af4cc270..a2ba78bdf7f7 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1751,11 +1751,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, ret = compat_core_sys_select(n, inp, outp, exp, &timeout); if (tvp) { + struct compat_timeval rtv; + if (current->personality & STICKY_TIMEOUTS) goto sticky; - tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - tv.tv_sec = timeout; - if (copy_to_user(tvp, &tv, sizeof(tv))) { + rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); + rtv.tv_sec = timeout; + if (compat_timeval_compare(&rtv, &tv) < 0) + rtv = tv; + if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: /* * If an application puts its timeval in read-only @@ -1822,13 +1826,17 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); if (tsp && !(current->personality & STICKY_TIMEOUTS)) { - ts.tv_sec += timeout / HZ; - ts.tv_nsec += (timeout % HZ) * (1000000000/HZ); - if (ts.tv_nsec >= 1000000000) { - ts.tv_sec++; - ts.tv_nsec -= 1000000000; + struct compat_timespec rts; + + rts.tv_sec = timeout / HZ; + rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); + if (rts.tv_nsec >= NSEC_PER_SEC) { + rts.tv_sec++; + rts.tv_nsec -= NSEC_PER_SEC; } - (void)copy_to_user(tsp, &ts, sizeof(ts)); + if (compat_timespec_compare(&rts, &ts) < 0) + rts = ts; + copy_to_user(tsp, &rts, sizeof(rts)); } if (ret == -ERESTARTNOHAND) { @@ -1918,12 +1926,17 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, sigprocmask(SIG_SETMASK, &sigsaved, NULL); if (tsp && timeout >= 0) { + struct compat_timespec rts; + if (current->personality & STICKY_TIMEOUTS) goto sticky; /* Yes, we know it's actually an s64, but it's also positive. */ - ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; - ts.tv_sec = timeout; - if (copy_to_user(tsp, &ts, sizeof(ts))) { + rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * + 1000; + rts.tv_sec = timeout; + if (compat_timespec_compare(&rts, &ts) < 0) + rts = ts; + if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: /* * If an application puts its timeval in read-only diff --git a/fs/select.c b/fs/select.c index bc60a3e14ef3..6ce68a9c8976 100644 --- a/fs/select.c +++ b/fs/select.c @@ -398,11 +398,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, ret = core_sys_select(n, inp, outp, exp, &timeout); if (tvp) { + struct timeval rtv; + if (current->personality & STICKY_TIMEOUTS) goto sticky; - tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - tv.tv_sec = timeout; - if (copy_to_user(tvp, &tv, sizeof(tv))) { + rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); + rtv.tv_sec = timeout; + if (timeval_compare(&rtv, &tv) < 0) + rtv = tv; + if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: /* * If an application puts its timeval in read-only @@ -460,11 +464,16 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, ret = core_sys_select(n, inp, outp, exp, &timeout); if (tsp) { + struct timespec rts; + if (current->personality & STICKY_TIMEOUTS) goto sticky; - ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; - ts.tv_sec = timeout; - if (copy_to_user(tsp, &ts, sizeof(ts))) { + rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * + 1000; + rts.tv_sec = timeout; + if (timespec_compare(&rts, &ts) < 0) + rts = ts; + if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: /* * If an application puts its timeval in read-only @@ -758,12 +767,17 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, sigprocmask(SIG_SETMASK, &sigsaved, NULL); if (tsp && timeout >= 0) { + struct timespec rts; + if (current->personality & STICKY_TIMEOUTS) goto sticky; /* Yes, we know it's actually an s64, but it's also positive. */ - ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; - ts.tv_sec = timeout; - if (copy_to_user(tsp, &ts, sizeof(ts))) { + rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * + 1000; + rts.tv_sec = timeout; + if (timespec_compare(&rts, &ts) < 0) + rts = ts; + if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: /* * If an application puts its timeval in read-only diff --git a/include/linux/compat.h b/include/linux/compat.h index f9ca534787e2..c9ab2a26348c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -161,5 +161,25 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +static inline int compat_timeval_compare(struct compat_timeval *lhs, + struct compat_timeval *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_usec - rhs->tv_usec; +} + +static inline int compat_timespec_compare(struct compat_timespec *lhs, + struct compat_timespec *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/time.h b/include/linux/time.h index 7b4dc36532bb..d9cdba54b789 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -33,11 +33,34 @@ struct timezone { #define NSEC_PER_SEC 1000000000L #define NSEC_PER_USEC 1000L -static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) +static inline int timespec_equal(struct timespec *a, struct timespec *b) { return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } +/* + * lhs < rhs: return <0 + * lhs == rhs: return 0 + * lhs > rhs: return >0 + */ +static inline int timespec_compare(struct timespec *lhs, struct timespec *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + +static inline int timeval_compare(struct timeval *lhs, struct timeval *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_usec - rhs->tv_usec; +} + extern unsigned long mktime(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); -- cgit v1.3-6-gb490 From a386fba2516b5404864647906219ced57bf2f2b7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Feb 2006 17:56:01 -0800 Subject: [PATCH] s390: fix non smp build of kexec Add missing smp_cpu_not_running define to avoid build warnings in the non smp case. Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/machine_kexec.c | 5 +++-- include/asm-s390/smp.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f0ed5c642c74..bad81b5832db 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -12,15 +12,16 @@ * on the S390 architecture. */ -#include -#include #include #include #include #include +#include +#include #include #include #include +#include static void kexec_halt_all_cpus(void *); diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index a2ae7628bbaa..9c6e9c300eb9 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h @@ -101,6 +101,7 @@ smp_call_function_on(void (*func) (void *info), void *info, func(info); return 0; } +#define smp_cpu_not_running(cpu) 1 #define smp_get_cpu(cpu) ({ 0; }) #define smp_put_cpu(cpu) ({ 0; }) #endif -- cgit v1.3-6-gb490 From e7684277f6882a23cfb734cb7450c3a469e6e8b1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Feb 2006 17:56:02 -0800 Subject: [PATCH] s390: add support for unshare system call Add support for unshare system call. Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_wrapper.S | 5 +++++ arch/s390/kernel/syscalls.S | 1 + include/asm-s390/unistd.h | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 83b33fe1923c..38a6ef5ec624 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1602,3 +1602,8 @@ compat_sys_ppoll_wrapper: llgtr %r5,%r5 # const sigset_t * llgfr %r6,%r6 # size_t jg compat_sys_ppoll + + .globl sys_unshare_wrapper +sys_unshare_wrapper: + llgfr %r2,%r2 # unsigned long + jg sys_unshare diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 3280345efacd..e86a4debb160 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -311,3 +311,4 @@ SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper) SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */ SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) +SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper) diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index 29a9f357eb9e..0a2f6664c4cf 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -295,8 +295,9 @@ #define __NR_faccessat 300 #define __NR_pselect6 301 #define __NR_ppoll 302 +#define __NR_unshare 303 -#define NR_syscalls 303 +#define NR_syscalls 304 /* * There are some system calls that are not present on 64 bit, some -- cgit v1.3-6-gb490 From 0defa3c19e7792001df09d6fa5ab461d3599ff6d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 11 Feb 2006 17:56:03 -0800 Subject: [PATCH] s390: add #ifdef __KERNEL__ to asm-s390/setup.h Based on a patch from Maximilian Attems . Nothing in asm-s390/setup.h is of interest for user space. Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-s390/setup.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 348a88137445..da3fd4a7bb32 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -8,6 +8,8 @@ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H +#ifdef __KERNEL__ + #include #define PARMAREA 0x10400 @@ -114,7 +116,7 @@ extern u16 ipl_devno; IPL_PARMBLOCK_ORIGIN) #define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.length) -#else +#else /* __ASSEMBLY__ */ #ifndef __s390x__ #define IPL_DEVICE 0x10404 @@ -127,6 +129,6 @@ extern u16 ipl_devno; #endif /* __s390x__ */ #define COMMAND_LINE 0x10480 -#endif - -#endif +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _ASM_S390_SETUP_H */ -- cgit v1.3-6-gb490 From ef1bea9e2a5a72d2c3362522e0a09099406732ff Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sat, 11 Feb 2006 17:56:04 -0800 Subject: [PATCH] s390: remove one set of brackets in __constant_test_bit() Right now in __constant_test_bit for the s390 there is an extra set of () surrounding the calculation. This patch simply removes one set of () that is surrounding the whole clause. Signed-off-by: Eric Paris Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-s390/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h index 61232760cc3b..3628899f48bb 100644 --- a/include/asm-s390/bitops.h +++ b/include/asm-s390/bitops.h @@ -518,8 +518,8 @@ static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr static inline int __constant_test_bit(unsigned long nr, const volatile unsigned long *addr) { - return ((((volatile char *) addr) - [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7)))) != 0; + return (((volatile char *) addr) + [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7))) != 0; } #define test_bit(nr,addr) \ -- cgit v1.3-6-gb490 From bc7fc0601b3eb2254f080492f3fd69e319ed32d0 Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Sat, 11 Feb 2006 17:56:07 -0800 Subject: [PATCH] nvidiafb: Add support for Geforce4 MX 4000 Add support for Geforce4 MX 4000 (0x185) Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/nvidia/nvidia.c | 2 ++ include/linux/pci_ids.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index dbcb8962e57d..a7c4e5e8ead6 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -138,6 +138,8 @@ static struct pci_device_id nvidiafb_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_420_8X, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_4000, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_448_GO, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_488_GO, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7a61ccdcbc4b..82b83da25d77 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1087,6 +1087,7 @@ #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440_8X 0x0181 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440SE_8X 0x0182 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_420_8X 0x0183 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_4000 0x0185 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_448_GO 0x0186 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_488_GO 0x0187 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_580_XGL 0x0188 -- cgit v1.3-6-gb490 From 19bf9cbf6b313ae79a0c7278ccaa9c72c86931bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 12 Feb 2006 12:35:03 +0100 Subject: [PATCH] s390: fstatat64 support Add fstatat64 support to s390 in order to follow changes with commit cff2b760096d1e6feaa31948e7af4abbefe47822 . Also fixes compilation for 31 bit. Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_linux.c | 20 ++++++++++++++++++++ arch/s390/kernel/compat_wrapper.S | 8 ++++---- arch/s390/kernel/syscalls.S | 2 +- include/asm-s390/unistd.h | 4 +++- 4 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index cc20f0e3a7d3..2d021626c1a6 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -905,6 +905,26 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta return ret; } +asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, + struct stat64_emu31 __user* statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_stat64(statbuf, &stat); +out: + return error; +} + /* * Linux/i386 didn't use to be able to handle more than * 4 system call parameters, so these system calls used a memory diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 38a6ef5ec624..dd2d6c3e8df8 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1523,13 +1523,13 @@ compat_sys_futimesat_wrapper: llgtr %r4,%r4 # struct timeval * jg compat_sys_futimesat - .globl compat_sys_newfstatat_wrapper -compat_sys_newfstatat_wrapper: + .globl sys32_fstatat_wrapper +sys32_fstatat_wrapper: llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct stat * + llgtr %r4,%r4 # struct stat64 * lgfr %r5,%r5 # int - jg compat_sys_newfstatat + jg sys32_fstatat .globl sys_unlinkat_wrapper sys_unlinkat_wrapper: diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index e86a4debb160..84921fe8d266 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -301,7 +301,7 @@ SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) -SYSCALL(sys_newfstatat,sys_newfstatat,compat_sys_newfstatat_wrapper) +SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat_wrapper) SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index 0a2f6664c4cf..657d582e8149 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -285,7 +285,7 @@ #define __NR_mknodat 290 #define __NR_fchownat 291 #define __NR_futimesat 292 -#define __NR_newfstatat 293 +#define __NR_fstatat64 293 #define __NR_unlinkat 294 #define __NR_renameat 295 #define __NR_linkat 296 @@ -359,6 +359,7 @@ #undef __NR_fcntl64 #undef __NR_sendfile64 #undef __NR_fadvise64_64 +#undef __NR_fstatat64 #define __NR_select 142 #define __NR_getrlimit 191 /* SuS compliant getrlimit */ @@ -381,6 +382,7 @@ #define __NR_setgid 214 #define __NR_setfsuid 215 #define __NR_setfsgid 216 +#define __NR_newfstatat 293 #endif -- cgit v1.3-6-gb490 From 40ad7a6afc53217ad95b5ae2221e42d7655e057b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Feb 2006 23:30:11 -0800 Subject: [SPARC]: sys_newfstatat --> sys_fstatat64 Signed-off-by: David S. Miller --- arch/sparc/kernel/systbls.S | 2 +- arch/sparc64/kernel/sys_sparc32.c | 21 +++++++++++++++++++++ arch/sparc64/kernel/systbls.S | 4 ++-- include/asm-sparc/unistd.h | 2 +- include/asm-sparc64/unistd.h | 2 +- 5 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S index c0314705d73a..768de64b371f 100644 --- a/arch/sparc/kernel/systbls.S +++ b/arch/sparc/kernel/systbls.S @@ -76,7 +76,7 @@ sys_call_table: /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat -/*285*/ .long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_newfstatat +/*285*/ .long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64 /*290*/ .long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat /*295*/ .long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 9264ccbaaafa..417727bd87ba 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -428,6 +428,27 @@ asmlinkage long compat_sys_fstat64(unsigned int fd, return error; } +asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, + struct compat_stat64 __user * statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_compat_stat64(&stat, statbuf); + +out: + return error; +} + asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index a19168510be2..c3adb7ac167d 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -77,7 +77,7 @@ sys_call_table32: /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat - .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_newfstatat + .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64 /*285*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare @@ -146,7 +146,7 @@ sys_call_table: /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_nis_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat - .word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_newfstatat + .word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64 /*285*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat .word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index 0615d601a7c6..64ec640a40ee 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -305,7 +305,7 @@ #define __NR_mknodat 286 #define __NR_fchownat 287 #define __NR_futimesat 288 -#define __NR_newfstatat 289 +#define __NR_fstatat64 289 #define __NR_unlinkat 290 #define __NR_renameat 291 #define __NR_linkat 292 diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index c58ba8a096cf..a284986b1541 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -307,7 +307,7 @@ #define __NR_mknodat 286 #define __NR_fchownat 287 #define __NR_futimesat 288 -#define __NR_newfstatat 289 +#define __NR_fstatat64 289 #define __NR_unlinkat 290 #define __NR_renameat 291 #define __NR_linkat 292 -- cgit v1.3-6-gb490 From 56f3a40a5e7586043260669cc794e56fa58339e1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 13 Feb 2006 11:39:57 +0100 Subject: [Bluetooth] Reduce L2CAP MTU for RFCOMM connections This patch reduces the default L2CAP MTU for all RFCOMM connections from 1024 to 1013 to improve the interoperability with some broken RFCOMM implementations. To make this more flexible the L2CAP MTU becomes also a module parameter and so it can changed at runtime. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/rfcomm.h | 2 +- net/bluetooth/rfcomm/core.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index bbfac86734ec..89d743cfdfdf 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -33,7 +33,7 @@ #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 -#define RFCOMM_MAX_L2CAP_MTU 1024 +#define RFCOMM_MAX_L2CAP_MTU 1013 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 0d89d6434136..5b4253c61f62 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -46,13 +46,15 @@ #include #include -#define VERSION "1.6" - #ifndef CONFIG_BT_RFCOMM_DEBUG #undef BT_DBG #define BT_DBG(D...) #endif +#define VERSION "1.7" + +static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; + static struct task_struct *rfcomm_thread; static DECLARE_MUTEX(rfcomm_sem); @@ -623,7 +625,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU; + l2cap_pi(sk)->imtu = l2cap_mtu; release_sock(sk); s = rfcomm_session_add(sock, BT_BOUND); @@ -1868,7 +1870,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->imtu = RFCOMM_MAX_L2CAP_MTU; + l2cap_pi(sk)->imtu = l2cap_mtu; release_sock(sk); /* Start listening on the socket */ @@ -2070,6 +2072,9 @@ static void __exit rfcomm_exit(void) module_init(rfcomm_init); module_exit(rfcomm_exit); +module_param(l2cap_mtu, uint, 0644); +MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); + MODULE_AUTHOR("Maxim Krasnyansky , Marcel Holtmann "); MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION); MODULE_VERSION(VERSION); -- cgit v1.3-6-gb490 From 7a11c4d0635d9f6995736390b8c3346fe6f63d57 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Feb 2006 15:34:11 -0800 Subject: [IRDA]: Ratelimit messages. From: Joe Perches Based upon a patch by Dave Jones. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- include/net/irda/irda.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h index 05a840837fe7..1880e46ecc9b 100644 --- a/include/net/irda/irda.h +++ b/include/net/irda/irda.h @@ -82,9 +82,9 @@ do { if(!(expr)) { \ #define IRDA_ASSERT_LABEL(label) #endif /* CONFIG_IRDA_DEBUG */ -#define IRDA_WARNING(args...) printk(KERN_WARNING args) -#define IRDA_MESSAGE(args...) printk(KERN_INFO args) -#define IRDA_ERROR(args...) printk(KERN_ERR args) +#define IRDA_WARNING(args...) do { if (net_ratelimit()) printk(KERN_WARNING args); } while (0) +#define IRDA_MESSAGE(args...) do { if (net_ratelimit()) printk(KERN_INFO args); } while (0) +#define IRDA_ERROR(args...) do { if (net_ratelimit()) printk(KERN_ERR args); } while (0) /* * Magic numbers used by Linux-IrDA. Random numbers which must be unique to -- cgit v1.3-6-gb490 From faead26d7a06605add627f29aee73ba654ce11f9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 14 Feb 2006 10:42:07 -0600 Subject: [PATCH] add scsi_execute_in_process_context() API We have several points in the SCSI stack (primarily for our device functions) where we need to guarantee process context, but (given the place where the last reference was released) we cannot guarantee this. This API gets around the issue by executing the function directly if the caller has process context, but scheduling a workqueue to execute in process context if the caller doesn't have it. Unfortunately, it requires memory allocation in interrupt context, but it's better than what we have previously. The true solution will require a bit of re-engineering, so isn't appropriate for 2.6.16. Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ include/scsi/scsi.h | 2 ++ 2 files changed, 61 insertions(+) (limited to 'include') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4a602853a98e..4362dcde74af 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -2248,3 +2249,61 @@ scsi_target_unblock(struct device *dev) device_for_each_child(dev, NULL, target_unblock); } EXPORT_SYMBOL_GPL(scsi_target_unblock); + + +struct work_queue_work { + struct work_struct work; + void (*fn)(void *); + void *data; +}; + +static void execute_in_process_context_work(void *data) +{ + void (*fn)(void *data); + struct work_queue_work *wqw = data; + + fn = wqw->fn; + data = wqw->data; + + kfree(wqw); + + fn(data); +} + +/** + * scsi_execute_in_process_context - reliably execute the routine with user context + * @fn: the function to execute + * @data: data to pass to the function + * + * Executes the function immediately if process context is available, + * otherwise schedules the function for delayed execution. + * + * Returns: 0 - function was executed + * 1 - function was scheduled for execution + * <0 - error + */ +int scsi_execute_in_process_context(void (*fn)(void *data), void *data) +{ + struct work_queue_work *wqw; + + if (!in_interrupt()) { + fn(data); + return 0; + } + + wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC); + + if (unlikely(!wqw)) { + printk(KERN_ERR "Failed to allocate memory\n"); + WARN_ON(1); + return -ENOMEM; + } + + INIT_WORK(&wqw->work, execute_in_process_context_work, wqw); + wqw->fn = fn; + wqw->data = data; + schedule_work(&wqw->work); + + return 1; +} +EXPORT_SYMBOL_GPL(scsi_execute_in_process_context); diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index c60b8ff2f5e4..9c331258bc27 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -433,4 +433,6 @@ struct scsi_lun { /* Used to obtain the PCI location of a device */ #define SCSI_IOCTL_GET_PCI 0x5387 +int scsi_execute_in_process_context(void (*fn)(void *data), void *data); + #endif /* _SCSI_SCSI_H */ -- cgit v1.3-6-gb490 From f32ec77b421ee15bf5a42082b60679e997c07993 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 28 Nov 2005 13:10:54 +0000 Subject: [MIPS] RM200: Give RM200 it's own timex.h. So we can get rid of config.h and the #ifdef crapola in the generic timex.h. Signed-off-by: Ralf Baechle --- include/asm-mips/mach-generic/timex.h | 11 +---------- include/asm-mips/mach-rm200/timex.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 10 deletions(-) create mode 100644 include/asm-mips/mach-rm200/timex.h (limited to 'include') diff --git a/include/asm-mips/mach-generic/timex.h b/include/asm-mips/mach-generic/timex.h index c6a2e5f0574a..48b4cfaa0d50 100644 --- a/include/asm-mips/mach-generic/timex.h +++ b/include/asm-mips/mach-generic/timex.h @@ -3,20 +3,11 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2003 by Ralf Baechle + * Copyright (C) 2003, 2005 by Ralf Baechle */ #ifndef __ASM_MACH_GENERIC_TIMEX_H #define __ASM_MACH_GENERIC_TIMEX_H -#include - -/* - * Last remaining user of the i8254 PIC, will be converted, too ... - */ -#ifdef CONFIG_SNI_RM200_PCI -#define CLOCK_TICK_RATE 1193182 -#else #define CLOCK_TICK_RATE 500000 -#endif #endif /* __ASM_MACH_GENERIC_TIMEX_H */ diff --git a/include/asm-mips/mach-rm200/timex.h b/include/asm-mips/mach-rm200/timex.h new file mode 100644 index 000000000000..11ff6cb0f214 --- /dev/null +++ b/include/asm-mips/mach-rm200/timex.h @@ -0,0 +1,13 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 2005 by Ralf Baechle + */ +#ifndef __ASM_MACH_RM200_TIMEX_H +#define __ASM_MACH_RM200_TIMEX_H + +#define CLOCK_TICK_RATE 1193182 + +#endif /* __ASM_MACH_RM200_TIMEX_H */ -- cgit v1.3-6-gb490 From 359bbd42a5a205234d5943571fc7bf946967ee59 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 9 Feb 2006 12:13:28 +0000 Subject: [MIPS] Fold non-__mips64 case into CONFIG_32BIT case. Signed-off-by: Ralf Baechle --- include/asm-mips/unistd.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index e7ff9b187783..769305d20108 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -1184,10 +1184,8 @@ type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION -# ifndef __mips64 -# define __ARCH_WANT_STAT64 -# endif # ifdef CONFIG_32BIT +# define __ARCH_WANT_STAT64 # define __ARCH_WANT_SYS_TIME # endif # ifdef CONFIG_MIPS32_O32 -- cgit v1.3-6-gb490 From 41700e73995d6c814932cb55e12525bd34be1ca5 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 10 Feb 2006 00:39:06 +0900 Subject: [MIPS] Add protected_blast_icache_range, blast_icache_range, etc. Add blast_xxx_range(), protected_blast_xxx_range() etc. for common use. They are built by __BUILD_BLAST_CACHE_RANGE(). Use protected_cache_op() macro for various protected_ routines. Output code should be logically same. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 104 ++++++-------------------------------------- arch/mips/mm/c-tx39.c | 70 ++++------------------------- include/asm-mips/r4kcache.h | 74 +++++++++++++++++-------------- 3 files changed, 64 insertions(+), 184 deletions(-) (limited to 'include') diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index e51c38cef88e..1b71d91e8268 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -471,61 +471,29 @@ struct flush_icache_range_args { static inline void local_r4k_flush_icache_range(void *args) { struct flush_icache_range_args *fir_args = args; - unsigned long dc_lsize = cpu_dcache_line_size(); - unsigned long ic_lsize = cpu_icache_line_size(); - unsigned long sc_lsize = cpu_scache_line_size(); unsigned long start = fir_args->start; unsigned long end = fir_args->end; - unsigned long addr, aend; if (!cpu_has_ic_fills_f_dc) { if (end - start > dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_D */ - protected_writeback_dcache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } + protected_blast_dcache_range(start, end); } if (!cpu_icache_snoops_remote_store) { - if (end - start > scache_size) { + if (end - start > scache_size) r4k_blast_scache(); - } else { - addr = start & ~(sc_lsize - 1); - aend = (end - 1) & ~(sc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_SD */ - protected_writeback_scache_line(addr); - if (addr == aend) - break; - addr += sc_lsize; - } - } + else + protected_blast_scache_range(start, end); } } if (end - start > icache_size) r4k_blast_icache(); - else { - addr = start & ~(ic_lsize - 1); - aend = (end - 1) & ~(ic_lsize - 1); - while (1) { - /* Hit_Invalidate_I */ - protected_flush_icache_line(addr); - if (addr == aend) - break; - addr += ic_lsize; - } - } + else + protected_blast_icache_range(start, end); } static void r4k_flush_icache_range(unsigned long start, unsigned long end) @@ -619,27 +587,14 @@ static void r4k_flush_icache_page(struct vm_area_struct *vma, static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - /* Catch bad driver code */ BUG_ON(size == 0); if (cpu_has_subset_pcaches) { - unsigned long sc_lsize = cpu_scache_line_size(); - - if (size >= scache_size) { + if (size >= scache_size) r4k_blast_scache(); - return; - } - - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; - } + else + blast_scache_range(addr, addr + size); return; } @@ -651,17 +606,8 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) if (size >= dcache_size) { r4k_blast_dcache(); } else { - unsigned long dc_lsize = cpu_dcache_line_size(); - R4600_HIT_CACHEOP_WAR_IMPL; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) - break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } bc_wback_inv(addr, size); @@ -669,44 +615,22 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - /* Catch bad driver code */ BUG_ON(size == 0); if (cpu_has_subset_pcaches) { - unsigned long sc_lsize = cpu_scache_line_size(); - - if (size >= scache_size) { + if (size >= scache_size) r4k_blast_scache(); - return; - } - - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; - } + else + blast_scache_range(addr, addr + size); return; } if (size >= dcache_size) { r4k_blast_dcache(); } else { - unsigned long dc_lsize = cpu_dcache_line_size(); - R4600_HIT_CACHEOP_WAR_IMPL; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) - break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } bc_inv(addr, size); diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c index 0a97a9434eba..7c572bea4a98 100644 --- a/arch/mips/mm/c-tx39.c +++ b/arch/mips/mm/c-tx39.c @@ -44,8 +44,6 @@ __asm__ __volatile__( \ /* TX39H-style cache flush routines. */ static void tx39h_flush_icache_all(void) { - unsigned long start = KSEG0; - unsigned long end = (start + icache_size); unsigned long flags, config; /* disable icache (set ICE#) */ @@ -53,33 +51,18 @@ static void tx39h_flush_icache_all(void) config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); - - /* invalidate icache */ - while (start < end) { - cache16_unroll32(start, Index_Invalidate_I); - start += 0x200; - } - + blast_icache16(); write_c0_conf(config); local_irq_restore(flags); } static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - /* Catch bad driver code */ BUG_ON(size == 0); iob(); - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - invalidate_dcache_line(a); /* Hit_Invalidate_D */ - if (a == end) break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } @@ -241,42 +224,21 @@ static void tx39_flush_data_cache_page(unsigned long addr) static void tx39_flush_icache_range(unsigned long start, unsigned long end) { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - unsigned long addr, aend; - if (end - start > dcache_size) tx39_blast_dcache(); - else { - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_D */ - protected_writeback_dcache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } - } + else + protected_blast_dcache_range(start, end); if (end - start > icache_size) tx39_blast_icache(); else { unsigned long flags, config; - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); /* disable icache (set ICE#) */ local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); - while (1) { - /* Hit_Invalidate_I */ - protected_flush_icache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } + protected_blast_icache_range(start, end); write_c0_conf(config); local_irq_restore(flags); } @@ -311,7 +273,7 @@ static void tx39_flush_icache_page(struct vm_area_struct *vma, struct page *page static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; + unsigned long end; if (((size | addr) & (PAGE_SIZE - 1)) == 0) { end = addr + size; @@ -322,20 +284,13 @@ static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) } else if (size > dcache_size) { tx39_blast_dcache(); } else { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } } static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; + unsigned long end; if (((size | addr) & (PAGE_SIZE - 1)) == 0) { end = addr + size; @@ -346,14 +301,7 @@ static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) } else if (size > dcache_size) { tx39_blast_dcache(); } else { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - invalidate_dcache_line(a); /* Hit_Invalidate_D */ - if (a == end) break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } } diff --git a/include/asm-mips/r4kcache.h b/include/asm-mips/r4kcache.h index cc53196efa40..9632c27dad15 100644 --- a/include/asm-mips/r4kcache.h +++ b/include/asm-mips/r4kcache.h @@ -14,6 +14,7 @@ #include #include +#include /* * This macro return a properly sign-extended address suitable as base address @@ -78,22 +79,25 @@ static inline void flush_scache_line(unsigned long addr) cache_op(Hit_Writeback_Inv_SD, addr); } +#define protected_cache_op(op,addr) \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set noreorder \n" \ + " .set mips3 \n" \ + "1: cache %0, (%1) \n" \ + "2: .set pop \n" \ + " .section __ex_table,\"a\" \n" \ + " "STR(PTR)" 1b, 2b \n" \ + " .previous" \ + : \ + : "i" (op), "r" (addr)) + /* * The next two are for badland addresses like signal trampolines. */ static inline void protected_flush_icache_line(unsigned long addr) { - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " .set mips3 \n" - "1: cache %0, (%1) \n" - "2: .set pop \n" - " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 2b \n" - " .previous" - : - : "i" (Hit_Invalidate_I), "r" (addr)); + protected_cache_op(Hit_Invalidate_I, addr); } /* @@ -104,32 +108,12 @@ static inline void protected_flush_icache_line(unsigned long addr) */ static inline void protected_writeback_dcache_line(unsigned long addr) { - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " .set mips3 \n" - "1: cache %0, (%1) \n" - "2: .set pop \n" - " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 2b \n" - " .previous" - : - : "i" (Hit_Writeback_Inv_D), "r" (addr)); + protected_cache_op(Hit_Writeback_Inv_D, addr); } static inline void protected_writeback_scache_line(unsigned long addr) { - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " .set mips3 \n" - "1: cache %0, (%1) \n" - "2: .set pop \n" - " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 2b \n" - " .previous" - : - : "i" (Hit_Writeback_Inv_SD), "r" (addr)); + protected_cache_op(Hit_Writeback_Inv_SD, addr); } /* @@ -295,4 +279,28 @@ __BUILD_BLAST_CACHE(i, icache, Index_Invalidate_I, Hit_Invalidate_I, 64) __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) __BUILD_BLAST_CACHE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) +/* build blast_xxx_range, protected_blast_xxx_range */ +#define __BUILD_BLAST_CACHE_RANGE(pfx, desc, hitop, prot) \ +static inline void prot##blast_##pfx##cache##_range(unsigned long start, \ + unsigned long end) \ +{ \ + unsigned long lsize = cpu_##desc##_line_size(); \ + unsigned long addr = start & ~(lsize - 1); \ + unsigned long aend = (end - 1) & ~(lsize - 1); \ + while (1) { \ + prot##cache_op(hitop, addr); \ + if (addr == aend) \ + break; \ + addr += lsize; \ + } \ +} + +__BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, protected_) +__BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, protected_) +__BUILD_BLAST_CACHE_RANGE(i, icache, Hit_Invalidate_I, protected_) +__BUILD_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D, ) +__BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, ) +/* blast_inv_dcache_range */ +__BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, ) + #endif /* _ASM_R4KCACHE_H */ -- cgit v1.3-6-gb490 From 3218357c94af92478ef39163163a81e654385320 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 10 Feb 2006 01:31:24 +0000 Subject: [MIPS] More uaccess.h fixes with gcc >= 4.0.1. From Richard Sandiford : This patch caused a miscompilation of the restore_gp_regs() block in restore_sigcontext(). This was in a 32-bit kernel compiled with GCC CVS head. restore_gp_regs() copies 64-bit user fields into 32-bit variables, and in this combination, the new __get_user_asm_ll32() clobbers too many registers. It says: /* * Get a long long 64 using 32 bit registers. */ { \ __asm__ __volatile__( \ "1: lw %1, (%3) \n" \ "2: lw %D1, 4(%3) \n" \ " move %0, $0 \n" \ "3: .section .fixup,\"ax\" \n" \ "4: li %0, %4 \n" \ " move %1, $0 \n" \ " move %D1, $0 \n" \ " j 3b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ " " __UA_ADDR " 1b, 4b \n" \ " " __UA_ADDR " 2b, 4b \n" \ " .previous \n" \ : "=r" (__gu_err), "=&r" (val) \ : "0" (0), "r" (addr), "i" (-EFAULT)); \ } and this requires val (%1) to be a 64-bit value. In the case I saw, gcc was using $3 for the 32-bit val, and wasn't expecting $4 to be clobbered. Signed-off-by: Ralf Baechle --- include/asm-mips/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-mips/uaccess.h b/include/asm-mips/uaccess.h index 91d813a37823..7a553e9d44d3 100644 --- a/include/asm-mips/uaccess.h +++ b/include/asm-mips/uaccess.h @@ -266,6 +266,8 @@ do { \ */ #define __get_user_asm_ll32(val, addr) \ { \ + unsigned long long __gu_tmp; \ + \ __asm__ __volatile__( \ "1: lw %1, (%3) \n" \ "2: lw %D1, 4(%3) \n" \ @@ -280,8 +282,9 @@ do { \ " " __UA_ADDR " 1b, 4b \n" \ " " __UA_ADDR " 2b, 4b \n" \ " .previous \n" \ - : "=r" (__gu_err), "=&r" (val) \ + : "=r" (__gu_err), "=&r" (__gu_tmp) \ : "0" (0), "r" (addr), "i" (-EFAULT)); \ + (val) = __gu_tmp; \ } /* -- cgit v1.3-6-gb490 From fbb6b3a4ac0ccf12a97c98881d9d873d6dc26fe5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 10 Feb 2006 14:13:08 +0000 Subject: [MIPS] Get rid of kludgery needed to keep stdargs of old compilers working. Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 1 - include/asm-mips/gcc/sgidefs.h | 17 ----------------- 2 files changed, 18 deletions(-) delete mode 100644 include/asm-mips/gcc/sgidefs.h (limited to 'include') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 6a57407df1bc..38c0f3360d51 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -94,7 +94,6 @@ endif # machines may also. Since BFD is incredibly buggy with respect to # crossformat linking we rely on the elf2ecoff tool for format conversion. # -cflags-y += -I $(TOPDIR)/include/asm/gcc cflags-y += -G 0 -mno-abicalls -fno-pic -pipe LDFLAGS_vmlinux += -G 0 -static -n -nostdlib MODFLAGS += -mlong-calls diff --git a/include/asm-mips/gcc/sgidefs.h b/include/asm-mips/gcc/sgidefs.h deleted file mode 100644 index 05994371a2af..000000000000 --- a/include/asm-mips/gcc/sgidefs.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * include/sgidefs.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1996 by Ralf Baechle - * - * This file is here to satisfy GCC's expectations. - */ -#ifndef __SGIDEFS_H -#define __SGIDEFS_H - -#include - -#endif /* __SGIDEFS_H */ -- cgit v1.3-6-gb490 From 9cf8ff96447f995d5ea18ec9f25dc8dae26501a2 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 13 Feb 2006 09:15:49 +0000 Subject: [MIPS] Fix CPU type bitmasks for MIPS III, IV and V. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle --- include/asm-mips/cpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-mips/cpu.h b/include/asm-mips/cpu.h index 934e063e79f1..818b9a97e214 100644 --- a/include/asm-mips/cpu.h +++ b/include/asm-mips/cpu.h @@ -204,9 +204,9 @@ */ #define MIPS_CPU_ISA_I 0x00000001 #define MIPS_CPU_ISA_II 0x00000002 -#define MIPS_CPU_ISA_III 0x00000003 -#define MIPS_CPU_ISA_IV 0x00000004 -#define MIPS_CPU_ISA_V 0x00000005 +#define MIPS_CPU_ISA_III 0x00000004 +#define MIPS_CPU_ISA_IV 0x00000008 +#define MIPS_CPU_ISA_V 0x00000010 #define MIPS_CPU_ISA_M32R1 0x00000020 #define MIPS_CPU_ISA_M32R2 0x00000040 #define MIPS_CPU_ISA_M64R1 0x00000080 -- cgit v1.3-6-gb490 From a6b14fa6fdc01ab3519c2729624f808677539b59 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 14 Feb 2006 15:01:12 -0800 Subject: [IA64] Count disabled cpus as potential hot-pluggable CPUs Have a facility to account for potentially hot-pluggable CPUs. ACPI doesnt give a determinstic method to find hot-pluggable CPUs. Hence we use 2 methods to assist. - BIOS can mark potentially hot-pluggable CPUs as disabled in the MADT tables. - User can specify the number of hot-pluggable CPUs via parameter additional_cpus=X The option is enabled only if ACPI_CONFIG_HOTPLUG_CPU=y which enables the physical hotplug option. Without which user can still use logical onlining and offlining of CPUs by enabling CONFIG_HOTPLUG_CPU=y Adds more bits to cpu_possible_map for potentially hot-pluggable cpus. Signed-off-by: Ashok Raj Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/setup.c | 4 ++++ include/asm-ia64/acpi.h | 2 ++ 3 files changed, 62 insertions(+) (limited to 'include') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index d2702c419cf8..34795ede72e0 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -761,6 +761,62 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) return (0); } +int additional_cpus __initdata = -1; + +static __init int setup_additional_cpus(char *s) +{ + if (s) + additional_cpus = simple_strtol(s, NULL, 0); + + return 0; +} + +early_param("additional_cpus", setup_additional_cpus); + +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with additional_cpus=NUM + * - Otherwise don't reserve additional CPUs. + */ +__init void prefill_possible_map(void) +{ + int i; + int possible, disabled_cpus; + + disabled_cpus = total_cpus - available_cpus; + if (additional_cpus == -1) { + if (disabled_cpus > 0) { + possible = total_cpus; + additional_cpus = disabled_cpus; + } + else { + possible = available_cpus; + additional_cpus = 0; + } + } else { + possible = available_cpus + additional_cpus; + } + if (possible > NR_CPUS) + possible = NR_CPUS; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, + max_t(int, additional_cpus, 0)); + + for (i = 0; i < possible; i++) + cpu_set(i, cpu_possible_map); +} + int acpi_map_lsapic(acpi_handle handle, int *pcpu) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 35f7835294a3..3258e09278d0 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -430,6 +430,7 @@ setup_arch (char **cmdline_p) if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); + parse_early_param(); #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -688,6 +689,9 @@ void setup_per_cpu_areas (void) { /* start_kernel() requires this... */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif } /* diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index 3a544ffc5008..f7a517654308 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h @@ -106,6 +106,8 @@ extern unsigned int can_cpei_retarget(void); extern unsigned int is_cpu_cpei_target(unsigned int cpu); extern void set_cpei_target_cpu(unsigned int cpu); extern unsigned int get_cpei_target_cpu(void); +extern void prefill_possible_map(void); +extern int additional_cpus; #ifdef CONFIG_ACPI_NUMA /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/ -- cgit v1.3-6-gb490 From 7c8903f6373f9abecf060bad53ca36bc4ac037f2 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 14 Feb 2006 13:53:03 -0800 Subject: [PATCH] jbd: revert checkpoint list changes This patch reverts commit f93ea411b73594f7d144855fd34278bcf34a9afc: [PATCH] jbd: split checkpoint lists This broke journal_flush() for OCFS2, which is its method of being sure that metadata is sent to disk for another node. And two related commits 8d3c7fce2d20ecc3264c8d8c91ae3beacdeaed1b and 43c3e6f5abdf6acac9b90c86bf03f995bf7d3d92 with the subjects: [PATCH] jbd: log_do_checkpoint fix [PATCH] jbd: remove_transaction fix These seem to be incremental bugfixes on the original patch and as such are no longer needed. Signed-off-by: Mark Fasheh Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/checkpoint.c | 418 ++++++++++++++++++++++------------------------------ fs/jbd/commit.c | 3 +- include/linux/jbd.h | 8 +- 3 files changed, 179 insertions(+), 250 deletions(-) (limited to 'include') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e6265a0b56b8..543ed543d1e5 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -24,75 +24,29 @@ #include /* - * Unlink a buffer from a transaction checkpoint list. + * Unlink a buffer from a transaction. * * Called with j_list_lock held. */ -static void __buffer_unlink_first(struct journal_head *jh) +static inline void __buffer_unlink(struct journal_head *jh) { transaction_t *transaction; transaction = jh->b_cp_transaction; + jh->b_cp_transaction = NULL; jh->b_cpnext->b_cpprev = jh->b_cpprev; jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) { + if (transaction->t_checkpoint_list == jh) transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; - } -} - -/* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ - -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction; - - transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - -/* - * Move a buffer from the checkpoint list to the checkpoint io list - * - * Called with j_list_lock held - */ - -static inline void __buffer_relink_io(struct journal_head *jh) -{ - transaction_t *transaction; - - transaction = jh->b_cp_transaction; - __buffer_unlink_first(jh); - - if (!transaction->t_checkpoint_io_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_io_list; - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_io_list = jh; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; } /* * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it and 2 if we also released the - * whole transaction. - * + * Returns 1 if we released it. * Requires j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ @@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); - ret = __journal_remove_checkpoint(jh) + 1; + __journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); + ret = 1; } else { jbd_unlock_bh_state(bh); } @@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) } /* - * Clean up transaction's list of buffers submitted for io. - * We wait for any pending IO to complete and remove any clean - * buffers. Note that we take the buffers in the opposite ordering - * from the one in which they were submitted for IO. + * Clean up a transaction's checkpoint list. + * + * We wait for any pending IO to complete and make sure any clean + * buffers are removed from the transaction. + * + * Return 1 if we performed any actions which might have destroyed the + * checkpoint. (journal_remove_checkpoint() deletes the transaction when + * the last checkpoint buffer is cleansed) * * Called with j_list_lock held. */ - -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) { - struct journal_head *jh; + struct journal_head *jh, *next_jh, *last_jh; struct buffer_head *bh; - tid_t this_tid; - int released = 0; - - this_tid = transaction->t_tid; -restart: - /* Didn't somebody clean up the transaction in the meanwhile */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - return; - while (!released && transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; + int ret = 0; + + assert_spin_locked(&journal->j_list_lock); + jh = transaction->t_checkpoint_list; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + next_jh = jh; + do { + jh = next_jh; bh = jh2bh(jh); - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); - goto restart; - } if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart; + goto out_return_1; } + /* - * Now in whatever state the buffer currently is, we know that - * it has been written out and so we can drop it from the list + * This is foul */ - released = __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - } + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + goto out_return_1; + } + + if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); + log_wait_commit(journal, tid); + goto out_return_1; + } + + /* + * AKPM: I think the buffer_jbddirty test is redundant - it + * shouldn't have NULL b_transaction? + */ + next_jh = jh->b_cpnext; + if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { + BUFFER_TRACE(bh, "remove from checkpoint"); + __journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + __brelse(bh); + ret = 1; + } else { + jbd_unlock_bh_state(bh); + } + } while (jh != last_jh); + + return ret; +out_return_1: + spin_lock(&journal->j_list_lock); + return 1; } #define NR_BATCH 64 @@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; + spin_unlock(&journal->j_list_lock); ll_rw_block(SWRITE, *batch_count, bhs); + spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); @@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * - * Called with j_list_lock held and drops it if 1 is returned + * Called with j_list_lock held. * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ -static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count) +static int __flush_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count, + int *drop_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - put_bh(bh); - ret = 1; - } - else if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; + if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { + J_ASSERT_JH(jh, jh->b_transaction == NULL); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - ret = 1; - } - else if (!buffer_dirty(bh)) { - J_ASSERT_JH(jh, !buffer_jbddirty(bh)); - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - put_bh(bh); - ret = 1; - } - else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. @@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; - __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { - spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } + } else { + int last_buffer = 0; + if (jh->b_cpnext == jh) { + /* We may be about to drop the transaction. Tell the + * caller that the lists have changed. + */ + last_buffer = 1; + } + if (__try_to_free_cp_buf(jh)) { + (*drop_count)++; + ret = last_buffer; + } } return ret; } /* - * Perform an actual checkpoint. We take the first transaction on the - * list of transactions to be checkpointed and send all its buffers - * to disk. We submit larger chunks of data at once. + * Perform an actual checkpoint. We don't write out only enough to + * satisfy the current blocked requests: rather we submit a reasonably + * sized chunk of the outstanding data to disk at once for + * efficiency. __log_wait_for_space() will retry if we didn't free enough. * + * However, we _do_ take into account the amount requested so that once + * the IO has been queued, we can return as soon as enough of it has + * completed to disk. + * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { - transaction_t *transaction; - tid_t this_tid; int result; + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); @@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal) return result; /* - * OK, we need to start writing disk blocks. Take one transaction - * and write it. + * OK, we need to start writing disk blocks. Try to free up a + * quarter of the log in a single checkpoint if we can. */ - spin_lock(&journal->j_list_lock); - if (!journal->j_checkpoint_transactions) - goto out; - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; -restart: /* - * If someone cleaned up this transaction while we slept, we're - * done (maybe it's a new transaction, but it fell at the same - * address). + * AKPM: check this code. I had a feeling a while back that it + * degenerates into a busy loop at unmount time. */ - if (journal->j_checkpoint_transactions == transaction && - transaction->t_tid == this_tid) { - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; - struct journal_head *jh; - int retry = 0; - - while (!retry && transaction->t_checkpoint_list) { + spin_lock(&journal->j_list_lock); + while (journal->j_checkpoint_transactions) { + transaction_t *transaction; + struct journal_head *jh, *last_jh, *next_jh; + int drop_count = 0; + int cleanup_ret, retry = 0; + tid_t this_tid; + + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; + jh = transaction->t_checkpoint_list; + last_jh = jh->b_cpprev; + next_jh = jh; + do { struct buffer_head *bh; - jh = transaction->t_checkpoint_list; + jh = next_jh; + next_jh = jh->b_cpnext; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); retry = 1; break; } - retry = __process_buffer(journal, jh, bhs, - &batch_count); - if (!retry && - lock_need_resched(&journal->j_list_lock)) { - spin_unlock(&journal->j_list_lock); + retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); + if (cond_resched_lock(&journal->j_list_lock)) { retry = 1; break; } - } + } while (jh != last_jh && !retry); if (batch_count) { - if (!retry) { - spin_unlock(&journal->j_list_lock); - retry = 1; - } __flush_batch(journal, bhs, &batch_count); + retry = 1; } - if (retry) { - spin_lock(&journal->j_list_lock); - goto restart; - } /* - * Now we have cleaned up the first transaction's checkpoint - * list. Let's clean up the second one. + * If someone cleaned up this transaction while we slept, we're + * done + */ + if (journal->j_checkpoint_transactions != transaction) + break; + if (retry) + continue; + /* + * Maybe it's a new transaction, but it fell at the same + * address */ - __wait_cp_io(journal, transaction); + if (transaction->t_tid != this_tid) + continue; + /* + * We have walked the whole transaction list without + * finding anything to write to disk. We had better be + * able to make some progress or we are in trouble. + */ + cleanup_ret = __cleanup_transaction(journal, transaction); + J_ASSERT(drop_count != 0 || cleanup_ret != 0); + if (journal->j_checkpoint_transactions != transaction) + break; } -out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; + return 0; } @@ -471,53 +455,6 @@ int cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ -/* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and release them. - * - * Called with the journal locked. - * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) - */ - -static int journal_clean_one_cp_list(struct journal_head *jh, int *released) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - int ret, freed = 0; - - *released = 0; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranking */ - if (jbd_trylock_bh_state(jh2bh(jh))) { - ret = __try_to_free_cp_buf(jh); - if (ret) { - freed++; - if (ret == 2) { - *released = 1; - return freed; - } - } - } - /* - * This function only frees up some memory if possible so we - * dont have an obligation to finish processing. Bail out if - * preemption requested: - */ - if (need_resched()) - return freed; - } while (jh != last_jh); - - return freed; -} - /* * journal_clean_checkpoint_list * @@ -525,38 +462,46 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) * * Called with the journal locked. * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) + * Returns number of bufers reaped (for debug) */ int __journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0, released; + int ret = 0; transaction = journal->j_checkpoint_transactions; - if (!transaction) + if (transaction == 0) goto out; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { + struct journal_head *jh; + transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_list, &released); - if (need_resched()) - goto out; - if (released) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, &released); - if (need_resched()) - goto out; + jh = transaction->t_checkpoint_list; + if (jh) { + struct journal_head *last_jh = jh->b_cpprev; + struct journal_head *next_jh = jh; + + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranknig */ + if (jbd_trylock_bh_state(jh2bh(jh))) + ret += __try_to_free_cp_buf(jh); + /* + * This function only frees up some memory + * if possible so we dont have an obligation + * to finish processing. Bail out if preemption + * requested: + */ + if (need_resched()) + goto out; + } while (jh != last_jh); + } } while (transaction != last_transaction); out: return ret; @@ -571,22 +516,18 @@ out: * buffer updates committed in that transaction have safely been stored * elsewhere on disk. To achieve this, all of the buffers in a * transaction need to be maintained on the transaction's checkpoint - * lists until they have been rewritten, at which point this function is + * list until they have been rewritten, at which point this function is * called to remove the buffer from the existing transaction's - * checkpoint lists. - * - * The function returns 1 if it frees the transaction, 0 otherwise. + * checkpoint list. * * This function is called with the journal locked. * This function is called with j_list_lock held. - * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ -int __journal_remove_checkpoint(struct journal_head *jh) +void __journal_remove_checkpoint(struct journal_head *jh) { transaction_t *transaction; journal_t *journal; - int ret = 0; JBUFFER_TRACE(jh, "entry"); @@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) journal = transaction->t_journal; __buffer_unlink(jh); - jh->b_cp_transaction = NULL; - if (transaction->t_checkpoint_list != NULL || - transaction->t_checkpoint_io_list != NULL) + if (transaction->t_checkpoint_list != NULL) goto out; JBUFFER_TRACE(jh, "transaction has no more buffers"); @@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) /* Just in case anybody was waiting for more transactions to be checkpointed... */ wake_up(&journal->j_wait_logspace); - ret = 1; out: JBUFFER_TRACE(jh, "exit"); - return ret; } /* @@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_updates == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 29e62d98bae6..002ad2bbc769 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -829,8 +829,7 @@ restart_loop: journal->j_committing_transaction = NULL; spin_unlock(&journal->j_state_lock); - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { + if (commit_transaction->t_checkpoint_list == NULL) { __journal_drop_transaction(journal, commit_transaction); } else { if (journal->j_checkpoint_transactions == NULL) { diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 0fe4aa891ddc..41ee79962bb2 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -497,12 +497,6 @@ struct transaction_s */ struct journal_head *t_checkpoint_list; - /* - * Doubly-linked circular list of all buffers submitted for IO while - * checkpointing. [j_list_lock] - */ - struct journal_head *t_checkpoint_io_list; - /* * Doubly-linked circular list of temporary buffers currently undergoing * IO in the log [j_list_lock] @@ -852,7 +846,7 @@ extern void journal_commit_transaction(journal_t *); /* Checkpoint list management */ int __journal_clean_checkpoint_list(journal_t *journal); -int __journal_remove_checkpoint(struct journal_head *); +void __journal_remove_checkpoint(struct journal_head *); void __journal_insert_checkpoint(struct journal_head *, transaction_t *); /* Buffer IO */ -- cgit v1.3-6-gb490 From 5ac5f9d1ce8492163dbde5d357dc5d03becf7e36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Feb 2006 13:53:04 -0800 Subject: [PATCH] NLM: Fix the NLM_GRANTED callback checks If 2 threads attached to the same process are blocking on different locks on different files (maybe even on different servers) but have the same lock arguments (i.e. same offset+length - actually quite common, since most processes try to lock the entire file) then the first GRANTED call that wakes one up will also wake the other. Currently when the NLM_GRANTED callback comes in, lockd walks the list of blocked locks in search of a match to the lock that the NLM server has granted. Although it checks the lock pid, start and end, it fails to check the filehandle and the server address. By checking the filehandle and server IP address, we ensure that this only happens if the locks truly are referencing the same file. Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/lockd/clntlock.c | 27 +++++++++++++++++---------- fs/lockd/svc4proc.c | 2 +- fs/lockd/svcproc.c | 2 +- include/linux/lockd/lockd.h | 6 +++--- 4 files changed, 22 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 3eaf6e701087..da6354baa0b8 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout) /* * The server lockd has called us back to tell us the lock was granted */ -u32 -nlmclnt_grant(struct nlm_lock *lock) +u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) { + const struct file_lock *fl = &lock->fl; + const struct nfs_fh *fh = &lock->fh; struct nlm_wait *block; u32 res = nlm_lck_denied; @@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock) * Warning: must not use cookie to match it! */ list_for_each_entry(block, &nlm_blocked, b_list) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) { - /* Alright, we found a lock. Set the return status - * and wake up the caller - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - res = nlm_granted; - } + struct file_lock *fl_blocked = block->b_lock; + + if (!nlm_compare_locks(fl_blocked, fl)) + continue; + if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) + continue; + if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0) + continue; + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; } return res; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4063095d849e..b10f913aa06a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(&argp->lock); + resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3bc437e0cf5b..35681d9cf1fc 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(&argp->lock); + resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 920766cea79c..ef21ed296039 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -149,7 +149,7 @@ struct nlm_rqst * nlmclnt_alloc_call(void); int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_rqst *req); long nlmclnt_block(struct nlm_rqst *req, long timeout); -u32 nlmclnt_grant(struct nlm_lock *); +u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *); @@ -204,7 +204,7 @@ nlmsvc_file_inode(struct nlm_file *file) * Compare two host addresses (needs modifying for ipv6) */ static __inline__ int -nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2) +nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2) { return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } @@ -214,7 +214,7 @@ nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2) * When the second lock is of type F_UNLCK, this acts like a wildcard. */ static __inline__ int -nlm_compare_locks(struct file_lock *fl1, struct file_lock *fl2) +nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { return fl1->fl_pid == fl2->fl_pid && fl1->fl_start == fl2->fl_start -- cgit v1.3-6-gb490 From 8b09fb34513225d87d511c7e8f29c0fd3cf860e0 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 14 Feb 2006 13:53:05 -0800 Subject: [PATCH] fix x86 topology export in sysfs for subarchitectures The correct way to export hyperthreading based functions is to predicate them on CONFIG_X86_HT. Without this, the topology exporting patch breaks the build on all non-PC x86 subarchitectures. Signed-off-by: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index af503a122b23..aa958c6ee83e 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -27,7 +27,7 @@ #ifndef _ASM_I386_TOPOLOGY_H #define _ASM_I386_TOPOLOGY_H -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT #define topology_physical_package_id(cpu) \ (phys_proc_id[cpu] == BAD_APICID ? -1 : phys_proc_id[cpu]) #define topology_core_id(cpu) \ -- cgit v1.3-6-gb490 From f822566165dd46ff5de9bf895cfa6c51f53bb0c4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 14 Feb 2006 13:53:08 -0800 Subject: [PATCH] madvise MADV_DONTFORK/MADV_DOFORK Currently, copy-on-write may change the physical address of a page even if the user requested that the page is pinned in memory (either by mlock or by get_user_pages). This happens if the process forks meanwhile, and the parent writes to that page. As a result, the page is orphaned: in case of get_user_pages, the application will never see any data hardware DMA's into this page after the COW. In case of mlock'd memory, the parent is not getting the realtime/security benefits of mlock. In particular, this affects the Infiniband modules which do DMA from and into user pages all the time. This patch adds madvise options to control whether memory range is inherited across fork. Useful e.g. for when hardware is doing DMA from/into these pages. Could also be useful to an application wanting to speed up its forks by cutting large areas out of consideration. Signed-off-by: Michael S. Tsirkin Acked-by: Hugh Dickins Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mman.h | 2 ++ include/asm-arm/mman.h | 2 ++ include/asm-arm26/mman.h | 2 ++ include/asm-cris/mman.h | 2 ++ include/asm-frv/mman.h | 2 ++ include/asm-h8300/mman.h | 2 ++ include/asm-i386/mman.h | 2 ++ include/asm-ia64/mman.h | 2 ++ include/asm-m32r/mman.h | 2 ++ include/asm-m68k/mman.h | 2 ++ include/asm-mips/mman.h | 2 ++ include/asm-parisc/mman.h | 2 ++ include/asm-powerpc/mman.h | 2 ++ include/asm-s390/mman.h | 2 ++ include/asm-sh/mman.h | 2 ++ include/asm-sparc/mman.h | 2 ++ include/asm-sparc64/mman.h | 2 ++ include/asm-v850/mman.h | 2 ++ include/asm-x86_64/mman.h | 2 ++ include/asm-xtensa/mman.h | 2 ++ mm/madvise.c | 21 +++++++++++++++++---- 21 files changed, 57 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h index f6439532a262..a21515c16a43 100644 --- a/include/asm-alpha/mman.h +++ b/include/asm-alpha/mman.h @@ -43,6 +43,8 @@ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ #define MADV_REMOVE 7 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h index f0bebca2ac21..693ed859e632 100644 --- a/include/asm-arm/mman.h +++ b/include/asm-arm/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h index 0ed7780541fa..2096c50df888 100644 --- a/include/asm-arm26/mman.h +++ b/include/asm-arm26/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h index 5a382b8bf3f7..deddfb239ff5 100644 --- a/include/asm-cris/mman.h +++ b/include/asm-cris/mman.h @@ -38,6 +38,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h index 8af4a41c255e..d3bca306da82 100644 --- a/include/asm-frv/mman.h +++ b/include/asm-frv/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h index 744a8fb485c2..ac0346f7d11d 100644 --- a/include/asm-h8300/mman.h +++ b/include/asm-h8300/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h index ba4941e6f643..ab2339a1d807 100644 --- a/include/asm-i386/mman.h +++ b/include/asm-i386/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h index 828beb24a20e..357ebb780cc0 100644 --- a/include/asm-ia64/mman.h +++ b/include/asm-ia64/mman.h @@ -44,6 +44,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h index 12e29747bc84..6b02fe3fcff2 100644 --- a/include/asm-m32r/mman.h +++ b/include/asm-m32r/mman.h @@ -38,6 +38,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h index ea262ab88b3b..efd12bc4ccb7 100644 --- a/include/asm-m68k/mman.h +++ b/include/asm-m68k/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h index dd17c8bd62a1..6d01e26830fa 100644 --- a/include/asm-mips/mman.h +++ b/include/asm-mips/mman.h @@ -66,6 +66,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h index 736b0abcac05..a381cf5c8f55 100644 --- a/include/asm-parisc/mman.h +++ b/include/asm-parisc/mman.h @@ -49,6 +49,8 @@ #define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ #define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ #define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h index a2e34c21b44f..fcff25d13f13 100644 --- a/include/asm-powerpc/mman.h +++ b/include/asm-powerpc/mman.h @@ -45,6 +45,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h index c8d5409b5d56..d41ca1477010 100644 --- a/include/asm-s390/mman.h +++ b/include/asm-s390/mman.h @@ -44,6 +44,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h index 693bd55a3710..0e08d0573abc 100644 --- a/include/asm-sh/mman.h +++ b/include/asm-sh/mman.h @@ -36,6 +36,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h index 98435ad8619e..4a298b2be859 100644 --- a/include/asm-sparc/mman.h +++ b/include/asm-sparc/mman.h @@ -55,6 +55,8 @@ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ #define MADV_REMOVE 0x6 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h index cb4b6156194d..d705ec92da8b 100644 --- a/include/asm-sparc64/mman.h +++ b/include/asm-sparc64/mman.h @@ -55,6 +55,8 @@ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ #define MADV_REMOVE 0x6 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h index edc79965193a..7b851c310e41 100644 --- a/include/asm-v850/mman.h +++ b/include/asm-v850/mman.h @@ -33,6 +33,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h index d0e97b74f735..b699a38c1c3c 100644 --- a/include/asm-x86_64/mman.h +++ b/include/asm-x86_64/mman.h @@ -37,6 +37,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h index 082a7504925e..e2d7afb679c8 100644 --- a/include/asm-xtensa/mman.h +++ b/include/asm-xtensa/mman.h @@ -73,6 +73,8 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_REMOVE 0x5 /* remove these pages & resources */ +#define MADV_DONTFORK 0x30 /* dont inherit across fork */ +#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/mm/madvise.c b/mm/madvise.c index ae0ae3ea299a..af3d573b0141 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -22,16 +22,23 @@ static long madvise_behavior(struct vm_area_struct * vma, struct mm_struct * mm = vma->vm_mm; int error = 0; pgoff_t pgoff; - int new_flags = vma->vm_flags & ~VM_READHINTMASK; + int new_flags = vma->vm_flags; switch (behavior) { + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; + break; case MADV_SEQUENTIAL: - new_flags |= VM_SEQ_READ; + new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; break; case MADV_RANDOM: - new_flags |= VM_RAND_READ; + new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; break; - default: + case MADV_DONTFORK: + new_flags |= VM_DONTCOPY; + break; + case MADV_DOFORK: + new_flags &= ~VM_DONTCOPY; break; } @@ -177,6 +184,12 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, long error; switch (behavior) { + case MADV_DOFORK: + if (vma->vm_flags & VM_IO) { + error = -EINVAL; + break; + } + case MADV_DONTFORK: case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: -- cgit v1.3-6-gb490 From d6077cb80cde4506720f9165eba99ee07438513f Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Tue, 14 Feb 2006 13:53:10 -0800 Subject: [PATCH] sched: revert "filter affine wakeups" Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6: [PATCH] sched: filter affine wakeups Apparently caused more than 10% performance regression for aim7 benchmark. The setup in use is 16-cpu HP rx8620, 64Gb of memory and 12 MSA1000s with 144 disks. Each disk is 72Gb with a single ext3 filesystem (courtesy of HP, who supplied benchmark results). The problem is, for aim7, the wake-up pattern is random, but it still needs load balancing action in the wake-up path to achieve best performance. With the above commit, lack of load balancing hurts that workload. However, for workloads like database transaction processing, the requirement is exactly opposite. In the wake up path, best performance is achieved with absolutely zero load balancing. We simply wake up the process on the CPU that it was previously run. Worst performance is obtained when we do load balancing at wake up. There isn't an easy way to auto detect the workload characteristics. Ingo's earlier patch that detects idle CPU and decide whether to load balance or not doesn't perform with aim7 either since all CPUs are busy (it causes even bigger perf. regression). Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6, which causes more than 10% performance regression with aim7. Signed-off-by: Ken Chen Acked-by: Ingo Molnar Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 +---- kernel/sched.c | 10 +--------- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c1da0269a18..b6f51e3a38ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -697,11 +697,8 @@ struct task_struct { int lock_depth; /* BKL lock depth */ -#if defined(CONFIG_SMP) - int last_waker_cpu; /* CPU that last woke this task up */ -#if defined(__ARCH_WANT_UNLOCKED_CTXSW) +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) int oncpu; -#endif #endif int prio, static_prio; struct list_head run_list; diff --git a/kernel/sched.c b/kernel/sched.c index 87d93be336a1..66d957227de9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1204,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync) } } - if (p->last_waker_cpu != this_cpu) - goto out_set_cpu; - if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) goto out_set_cpu; @@ -1277,8 +1274,6 @@ out_set_cpu: cpu = task_cpu(p); } - p->last_waker_cpu = this_cpu; - out_activate: #endif /* CONFIG_SMP */ if (old_state == TASK_UNINTERRUPTIBLE) { @@ -1360,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags) #ifdef CONFIG_SCHEDSTATS memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif -#if defined(CONFIG_SMP) - p->last_waker_cpu = cpu; -#if defined(__ARCH_WANT_UNLOCKED_CTXSW) +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) p->oncpu = 0; #endif -#endif #ifdef CONFIG_PREEMPT /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; -- cgit v1.3-6-gb490 From 68f624fc8b9fa50de9cc0ebd612ef7b7b9fa32d0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Feb 2006 13:53:18 -0800 Subject: [PATCH] FRV: Miscellaneous fixes Make various alterations and fixes to the FRV arch: (1) Resyncs the FRV system call collection with the i386 arch. (2) Discards __iounmap() as it's not used. (3) Fixes the use of the SWAP/SWAPI instruction to get the arguments the right way around in atomic.h, and also to get the asm constraints correct. (4) Moves copy_to/from_user_page() to asm/cacheflush.h to be consistent with other archs. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/entry.S | 26 +++++++++++++++++++++----- arch/frv/mm/kmap.c | 9 --------- include/asm-frv/atomic.h | 6 +++--- include/asm-frv/cacheflush.h | 12 ++++++++++++ include/asm-frv/io.h | 1 - include/asm-frv/uaccess.h | 3 --- include/asm-frv/unistd.h | 28 ++++++++++++++++++++++------ 7 files changed, 58 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 5f6548388b74..c69d499d28cf 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1418,11 +1418,27 @@ sys_call_table: .long sys_add_key .long sys_request_key .long sys_keyctl - .long sys_ni_syscall // sys_vperfctr_open - .long sys_ni_syscall // sys_vperfctr_control /* 290 */ - .long sys_ni_syscall // sys_vperfctr_unlink - .long sys_ni_syscall // sys_vperfctr_iresume - .long sys_ni_syscall // sys_vperfctr_read + .long sys_ioprio_set + .long sys_ioprio_get /* 290 */ + .long sys_inotify_init + .long sys_inotify_add_watch + .long sys_inotify_rm_watch + .long sys_migrate_pages + .long sys_openat /* 295 */ + .long sys_mkdirat + .long sys_mknodat + .long sys_fchownat + .long sys_futimesat + .long sys_newfstatat /* 300 */ + .long sys_unlinkat + .long sys_renameat + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat /* 305 */ + .long sys_fchmodat + .long sys_faccessat + .long sys_pselect6 + .long sys_ppoll syscall_table_size = (. - sys_call_table) diff --git a/arch/frv/mm/kmap.c b/arch/frv/mm/kmap.c index 539f45e6d15e..c54f18e65ea6 100644 --- a/arch/frv/mm/kmap.c +++ b/arch/frv/mm/kmap.c @@ -43,15 +43,6 @@ void iounmap(void *addr) { } -/* - * __iounmap unmaps nearly everything, so be careful - * it doesn't free currently pointer/page tables anymore but it - * wans't used anyway and might be added later. - */ -void __iounmap(void *addr, unsigned long size) -{ -} - /* * Set new cache mode for some kernel address space. * The caller must push data for that range itself, if such data may already diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h index a59f684b4f33..5d9f84bfdcad 100644 --- a/include/asm-frv/atomic.h +++ b/include/asm-frv/atomic.h @@ -220,9 +220,9 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig switch (sizeof(__xg_orig)) { \ case 4: \ asm volatile( \ - "swap%I0 %2,%M0" \ - : "+m"(*__xg_ptr), "=&r"(__xg_orig) \ - : "r"(x) \ + "swap%I0 %M0,%1" \ + : "+m"(*__xg_ptr), "=r"(__xg_orig) \ + : "1"(x) \ : "memory" \ ); \ break; \ diff --git a/include/asm-frv/cacheflush.h b/include/asm-frv/cacheflush.h index 3007deccb490..eaa5826bc1c8 100644 --- a/include/asm-frv/cacheflush.h +++ b/include/asm-frv/cacheflush.h @@ -87,5 +87,17 @@ static inline void flush_icache_page(struct vm_area_struct *vma, struct page *pa flush_icache_user_range(vma, page, page_to_phys(page), PAGE_SIZE); } +/* + * permit ptrace to access another process's address space through the icache + * and the dcache + */ +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ +do { \ + memcpy((dst), (src), (len)); \ + flush_icache_user_range((vma), (page), (vaddr), (len)); \ +} while(0) + +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy((dst), (src), (len)) #endif /* _ASM_CACHEFLUSH_H */ diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h index 075369b1a34b..01247cb2bc39 100644 --- a/include/asm-frv/io.h +++ b/include/asm-frv/io.h @@ -251,7 +251,6 @@ static inline void writel(uint32_t datum, volatile void __iomem *addr) #define IOMAP_WRITETHROUGH 3 extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag); -extern void __iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size) { diff --git a/include/asm-frv/uaccess.h b/include/asm-frv/uaccess.h index b6bcbe01f6ee..a1d140438863 100644 --- a/include/asm-frv/uaccess.h +++ b/include/asm-frv/uaccess.h @@ -306,7 +306,4 @@ extern long strnlen_user(const char *src, long count); extern unsigned long search_exception_table(unsigned long addr); -#define copy_to_user_page(vma, page, vaddr, dst, src, len) memcpy(dst, src, len) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) memcpy(dst, src, len) - #endif /* _ASM_UACCESS_H */ diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index 4d994d2e99e3..322531caa484 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -295,13 +295,29 @@ #define __NR_add_key 286 #define __NR_request_key 287 #define __NR_keyctl 288 -#define __NR_vperfctr_open 289 -#define __NR_vperfctr_control (__NR_perfctr_info+1) -#define __NR_vperfctr_unlink (__NR_perfctr_info+2) -#define __NR_vperfctr_iresume (__NR_perfctr_info+3) -#define __NR_vperfctr_read (__NR_perfctr_info+4) +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_newfstatat 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 -#define NR_syscalls 294 +#define NR_syscalls 310 /* * process the return value of a syscall, consigning it to one of two possible fates -- cgit v1.3-6-gb490 From 28baebae73c3ea8b75c7cae225a7db817ab825a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Feb 2006 13:53:20 -0800 Subject: [PATCH] FRV: Use virtual interrupt disablement Make the FRV arch use virtual interrupt disablement because accesses to the processor status register (PSR) are relatively slow and because we will soon have the need to deal with multiple interrupt controls at the same time (separate h/w and inter-core interrupts). The way this is done is to dedicate one of the four integer condition code registers (ICC2) to maintaining a virtual interrupt disablement state whilst inside the kernel. This uses the ICC2.Z flag (Zero) to indicate whether the interrupts are virtually disabled and the ICC2.C flag (Carry) to indicate whether the interrupts are physically disabled. ICC2.Z is set to indicate interrupts are virtually disabled. ICC2.C is set to indicate interrupts are physically enabled. Under normal running conditions Z==0 and C==1. Disabling interrupts with local_irq_disable() doesn't then actually physically disable interrupts - it merely sets ICC2.Z to 1. Should an interrupt then happen, the exception prologue will note ICC2.Z is set and branch out of line using one instruction (an unlikely BEQ). Here it will physically disable interrupts and clear ICC2.C. When it comes time to enable interrupts (local_irq_enable()), this simply clears the ICC2.Z flag and invokes a trap #2 if both Z and C flags are clear (the HI integer condition). This can be done with the TIHI conditional trap instruction. The trap then physically reenables interrupts and sets ICC2.C again. Upon returning the interrupt will be taken as interrupts will then be enabled. Note that whilst processing the trap, the whole exceptions system is disabled, and so an interrupt can't happen till it returns. If no pending interrupt had happened, ICC2.C would still be set, the HI condition would not be fulfilled, and no trap will happen. Saving interrupts (local_irq_save) is simply a matter of pulling the ICC2.Z flag out of the CCR register, shifting it down and masking it off. This gives a result of 0 if interrupts were enabled and 1 if they weren't. Restoring interrupts (local_irq_restore) is then a matter of taking the saved value mentioned previously and XOR'ing it against 1. If it was one, the result will be zero, and if it was zero the result will be non-zero. This result is then used to affect the ICC2.Z flag directly (it is a condition code flag after all). An XOR instruction does not affect the Carry flag, and so that bit of state is unchanged. The two flags can then be sampled to see if they're both zero using the trap (TIHI) as for the unconditional reenablement (local_irq_enable). This patch also: (1) Modifies the debugging stub (break.S) to handle single-stepping crossing into the trap #2 handler and into virtually disabled interrupts. (2) Removes superseded fixup pointers from the second instructions in the trap tables (there's no a separate fixup table for this). (3) Declares the trap #3 vector for use in .org directives in the trap table. (4) Moves irq_enter() and irq_exit() in do_IRQ() to avoid problems with virtual interrupt handling, and removes the duplicate code that has now been folded into irq_exit() (softirq and preemption handling). (5) Tells the compiler in the arch Makefile that ICC2 is now reserved. (6) Documents the in-kernel ABI, including the virtual interrupts. (7) Renames the old irq management functions to different names. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/fujitsu/frv/kernel-ABI.txt | 234 +++++++++++++++++++++++++++++++ arch/frv/Makefile | 2 +- arch/frv/kernel/break.S | 77 +++++++++- arch/frv/kernel/entry-table.S | 39 +++++- arch/frv/kernel/entry.S | 65 ++++++++- arch/frv/kernel/head.S | 3 + arch/frv/kernel/irq.c | 41 +----- include/asm-frv/spr-regs.h | 1 + include/asm-frv/system.h | 88 +++++++++++- 9 files changed, 489 insertions(+), 61 deletions(-) create mode 100644 Documentation/fujitsu/frv/kernel-ABI.txt (limited to 'include') diff --git a/Documentation/fujitsu/frv/kernel-ABI.txt b/Documentation/fujitsu/frv/kernel-ABI.txt new file mode 100644 index 000000000000..0ed9b0a779bc --- /dev/null +++ b/Documentation/fujitsu/frv/kernel-ABI.txt @@ -0,0 +1,234 @@ + ================================= + INTERNAL KERNEL ABI FOR FR-V ARCH + ================================= + +The internal FRV kernel ABI is not quite the same as the userspace ABI. A number of the registers +are used for special purposed, and the ABI is not consistent between modules vs core, and MMU vs +no-MMU. + +This partly stems from the fact that FRV CPUs do not have a separate supervisor stack pointer, and +most of them do not have any scratch registers, thus requiring at least one general purpose +register to be clobbered in such an event. Also, within the kernel core, it is possible to simply +jump or call directly between functions using a relative offset. This cannot be extended to modules +for the displacement is likely to be too far. Thus in modules the address of a function to call +must be calculated in a register and then used, requiring two extra instructions. + +This document has the following sections: + + (*) System call register ABI + (*) CPU operating modes + (*) Internal kernel-mode register ABI + (*) Internal debug-mode register ABI + (*) Virtual interrupt handling + + +======================== +SYSTEM CALL REGISTER ABI +======================== + +When a system call is made, the following registers are effective: + + REGISTERS CALL RETURN + =============== ======================= ======================= + GR7 System call number Preserved + GR8 Syscall arg #1 Return value + GR9-GR13 Syscall arg #2-6 Preserved + + +=================== +CPU OPERATING MODES +=================== + +The FR-V CPU has three basic operating modes. In order of increasing capability: + + (1) User mode. + + Basic userspace running mode. + + (2) Kernel mode. + + Normal kernel mode. There are many additional control registers available that may be + accessed in this mode, in addition to all the stuff available to user mode. This has two + submodes: + + (a) Exceptions enabled (PSR.T == 1). + + Exceptions will invoke the appropriate normal kernel mode handler. On entry to the + handler, the PSR.T bit will be cleared. + + (b) Exceptions disabled (PSR.T == 0). + + No exceptions or interrupts may happen. Any mandatory exceptions will cause the CPU to + halt unless the CPU is told to jump into debug mode instead. + + (3) Debug mode. + + No exceptions may happen in this mode. Memory protection and management exceptions will be + flagged for later consideration, but the exception handler won't be invoked. Debugging traps + such as hardware breakpoints and watchpoints will be ignored. This mode is entered only by + debugging events obtained from the other two modes. + + All kernel mode registers may be accessed, plus a few extra debugging specific registers. + + +================================= +INTERNAL KERNEL-MODE REGISTER ABI +================================= + +There are a number of permanent register assignments that are set up by entry.S in the exception +prologue. Note that there is a complete set of exception prologues for each of user->kernel +transition and kernel->kernel transition. There are also user->debug and kernel->debug mode +transition prologues. + + + REGISTER FLAVOUR USE + =============== ======= ==================================================== + GR1 Supervisor stack pointer + GR15 Current thread info pointer + GR16 GP-Rel base register for small data + GR28 Current exception frame pointer (__frame) + GR29 Current task pointer (current) + GR30 Destroyed by kernel mode entry + GR31 NOMMU Destroyed by debug mode entry + GR31 MMU Destroyed by TLB miss kernel mode entry + CCR.ICC2 Virtual interrupt disablement tracking + CCCR.CC3 Cleared by exception prologue (atomic op emulation) + SCR0 MMU See mmu-layout.txt. + SCR1 MMU See mmu-layout.txt. + SCR2 MMU Save for EAR0 (destroyed by icache insns in debug mode) + SCR3 MMU Save for GR31 during debug exceptions + DAMR/IAMR NOMMU Fixed memory protection layout. + DAMR/IAMR MMU See mmu-layout.txt. + + +Certain registers are also used or modified across function calls: + + REGISTER CALL RETURN + =============== =============================== =============================== + GR0 Fixed Zero - + GR2 Function call frame pointer + GR3 Special Preserved + GR3-GR7 - Clobbered + GR8 Function call arg #1 Return value (or clobbered) + GR9 Function call arg #2 Return value MSW (or clobbered) + GR10-GR13 Function call arg #3-#6 Clobbered + GR14 - Clobbered + GR15-GR16 Special Preserved + GR17-GR27 - Preserved + GR28-GR31 Special Only accessed explicitly + LR Return address after CALL Clobbered + CCR/CCCR - Mostly Clobbered + + +================================ +INTERNAL DEBUG-MODE REGISTER ABI +================================ + +This is the same as the kernel-mode register ABI for functions calls. The difference is that in +debug-mode there's a different stack and a different exception frame. Almost all the global +registers from kernel-mode (including the stack pointer) may be changed. + + REGISTER FLAVOUR USE + =============== ======= ==================================================== + GR1 Debug stack pointer + GR16 GP-Rel base register for small data + GR31 Current debug exception frame pointer (__debug_frame) + SCR3 MMU Saved value of GR31 + + +Note that debug mode is able to interfere with the kernel's emulated atomic ops, so it must be +exceedingly careful not to do any that would interact with the main kernel in this regard. Hence +the debug mode code (gdbstub) is almost completely self-contained. The only external code used is +the sprintf family of functions. + +Futhermore, break.S is so complicated because single-step mode does not switch off on entry to an +exception. That means unless manually disabled, single-stepping will blithely go on stepping into +things like interrupts. See gdbstub.txt for more information. + + +========================== +VIRTUAL INTERRUPT HANDLING +========================== + +Because accesses to the PSR is so slow, and to disable interrupts we have to access it twice (once +to read and once to write), we don't actually disable interrupts at all if we don't have to. What +we do instead is use the ICC2 condition code flags to note virtual disablement, such that if we +then do take an interrupt, we note the flag, really disable interrupts, set another flag and resume +execution at the point the interrupt happened. Setting condition flags as a side effect of an +arithmetic or logical instruction is really fast. This use of the ICC2 only occurs within the +kernel - it does not affect userspace. + +The flags we use are: + + (*) CCR.ICC2.Z [Zero flag] + + Set to virtually disable interrupts, clear when interrupts are virtually enabled. Can be + modified by logical instructions without affecting the Carry flag. + + (*) CCR.ICC2.C [Carry flag] + + Clear to indicate hardware interrupts are really disabled, set otherwise. + + +What happens is this: + + (1) Normal kernel-mode operation. + + ICC2.Z is 0, ICC2.C is 1. + + (2) An interrupt occurs. The exception prologue examines ICC2.Z and determines that nothing needs + doing. This is done simply with an unlikely BEQ instruction. + + (3) The interrupts are disabled (local_irq_disable) + + ICC2.Z is set to 1. + + (4) If interrupts were then re-enabled (local_irq_enable): + + ICC2.Z would be set to 0. + + A TIHI #2 instruction (trap #2 if condition HI - Z==0 && C==0) would be used to trap if + interrupts were now virtually enabled, but physically disabled - which they're not, so the + trap isn't taken. The kernel would then be back to state (1). + + (5) An interrupt occurs. The exception prologue examines ICC2.Z and determines that the interrupt + shouldn't actually have happened. It jumps aside, and there disabled interrupts by setting + PSR.PIL to 14 and then it clears ICC2.C. + + (6) If interrupts were then saved and disabled again (local_irq_save): + + ICC2.Z would be shifted into the save variable and masked off (giving a 1). + + ICC2.Z would then be set to 1 (thus unchanged), and ICC2.C would be unaffected (ie: 0). + + (7) If interrupts were then restored from state (6) (local_irq_restore): + + ICC2.Z would be set to indicate the result of XOR'ing the saved value (ie: 1) with 1, which + gives a result of 0 - thus leaving ICC2.Z set. + + ICC2.C would remain unaffected (ie: 0). + + A TIHI #2 instruction would be used to again assay the current state, but this would do + nothing as Z==1. + + (8) If interrupts were then enabled (local_irq_enable): + + ICC2.Z would be cleared. ICC2.C would be left unaffected. Both flags would now be 0. + + A TIHI #2 instruction again issued to assay the current state would then trap as both Z==0 + [interrupts virtually enabled] and C==0 [interrupts really disabled] would then be true. + + (9) The trap #2 handler would simply enable hardware interrupts (set PSR.PIL to 0), set ICC2.C to + 1 and return. + +(10) Immediately upon returning, the pending interrupt would be taken. + +(11) The interrupt handler would take the path of actually processing the interrupt (ICC2.Z is + clear, BEQ fails as per step (2)). + +(12) The interrupt handler would then set ICC2.C to 1 since hardware interrupts are definitely + enabled - or else the kernel wouldn't be here. + +(13) On return from the interrupt handler, things would be back to state (1). + +This trap (#2) is only available in kernel mode. In user mode it will result in SIGILL. diff --git a/arch/frv/Makefile b/arch/frv/Makefile index 90c0fb8d9dc3..d163747d17c0 100644 --- a/arch/frv/Makefile +++ b/arch/frv/Makefile @@ -81,7 +81,7 @@ endif # - reserve CC3 for use with atomic ops # - all the extra registers are dealt with only at context switch time CFLAGS += -mno-fdpic -mgpr-32 -msoft-float -mno-media -CFLAGS += -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15 +CFLAGS += -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15 -ffixed-icc2 AFLAGS += -mno-fdpic ASFLAGS += -mno-fdpic diff --git a/arch/frv/kernel/break.S b/arch/frv/kernel/break.S index 33233dc23e29..687c48d62dde 100644 --- a/arch/frv/kernel/break.S +++ b/arch/frv/kernel/break.S @@ -200,12 +200,20 @@ __break_step: movsg bpcsr,gr2 sethi.p %hi(__entry_kernel_external_interrupt),gr3 setlo %lo(__entry_kernel_external_interrupt),gr3 - subcc gr2,gr3,gr0,icc0 + subcc.p gr2,gr3,gr0,icc0 + sethi %hi(__entry_uspace_external_interrupt),gr3 + setlo.p %lo(__entry_uspace_external_interrupt),gr3 beq icc0,#2,__break_step_kernel_external_interrupt - sethi.p %hi(__entry_uspace_external_interrupt),gr3 - setlo %lo(__entry_uspace_external_interrupt),gr3 - subcc gr2,gr3,gr0,icc0 + subcc.p gr2,gr3,gr0,icc0 + sethi %hi(__entry_kernel_external_interrupt_virtually_disabled),gr3 + setlo.p %lo(__entry_kernel_external_interrupt_virtually_disabled),gr3 beq icc0,#2,__break_step_uspace_external_interrupt + subcc.p gr2,gr3,gr0,icc0 + sethi %hi(__entry_kernel_external_interrupt_virtual_reenable),gr3 + setlo.p %lo(__entry_kernel_external_interrupt_virtual_reenable),gr3 + beq icc0,#2,__break_step_kernel_external_interrupt_virtually_disabled + subcc gr2,gr3,gr0,icc0 + beq icc0,#2,__break_step_kernel_external_interrupt_virtual_reenable LEDS 0x2007,gr2 @@ -254,6 +262,9 @@ __break_step_kernel_softprog_interrupt: # step through an external interrupt from kernel mode .globl __break_step_kernel_external_interrupt __break_step_kernel_external_interrupt: + # deal with virtual interrupt disablement + beq icc2,#0,__break_step_kernel_external_interrupt_virtually_disabled + sethi.p %hi(__entry_kernel_external_interrupt_reentry),gr3 setlo %lo(__entry_kernel_external_interrupt_reentry),gr3 @@ -294,6 +305,64 @@ __break_return_as_kernel_prologue: #endif rett #1 +# we single-stepped into an interrupt handler whilst interrupts were merely virtually disabled +# need to really disable interrupts, set flag, fix up and return +__break_step_kernel_external_interrupt_virtually_disabled: + movsg psr,gr2 + andi gr2,#~PSR_PIL,gr2 + ori gr2,#PSR_PIL_14,gr2 /* debugging interrupts only */ + movgs gr2,psr + + ldi @(gr31,#REG_CCR),gr3 + movgs gr3,ccr + subcc.p gr0,gr0,gr0,icc2 /* leave Z set, clear C */ + + # exceptions must've been enabled and we must've been in supervisor mode + setlos BPSR_BET|BPSR_BS,gr3 + movgs gr3,bpsr + + # return to where the interrupt happened + movsg pcsr,gr2 + movgs gr2,bpcsr + + lddi.p @(gr31,#REG_GR(2)),gr2 + + xor gr31,gr31,gr31 + movgs gr0,brr +#ifdef CONFIG_MMU + movsg scr3,gr31 +#endif + rett #1 + +# we stepped through into the virtual interrupt reenablement trap +# +# we also want to single step anyway, but after fixing up so that we get an event on the +# instruction after the broken-into exception returns + .globl __break_step_kernel_external_interrupt_virtual_reenable +__break_step_kernel_external_interrupt_virtual_reenable: + movsg psr,gr2 + andi gr2,#~PSR_PIL,gr2 + movgs gr2,psr + + ldi @(gr31,#REG_CCR),gr3 + movgs gr3,ccr + subicc gr0,#1,gr0,icc2 /* clear Z, set C */ + + # save the adjusted ICC2 + movsg ccr,gr3 + sti gr3,@(gr31,#REG_CCR) + + # exceptions must've been enabled and we must've been in supervisor mode + setlos BPSR_BET|BPSR_BS,gr3 + movgs gr3,bpsr + + # return to where the trap happened + movsg pcsr,gr2 + movgs gr2,bpcsr + + # and then process the single step + bra __break_continue + # step through an internal exception from uspace mode .globl __break_step_uspace_softprog_interrupt __break_step_uspace_softprog_interrupt: diff --git a/arch/frv/kernel/entry-table.S b/arch/frv/kernel/entry-table.S index 9b9243e2103c..81568acea9cd 100644 --- a/arch/frv/kernel/entry-table.S +++ b/arch/frv/kernel/entry-table.S @@ -116,6 +116,8 @@ __break_kerneltrap_fixup_table: .long __break_step_uspace_external_interrupt .section .trap.kernel .org \tbr_tt + # deal with virtual interrupt disablement + beq icc2,#0,__entry_kernel_external_interrupt_virtually_disabled bra __entry_kernel_external_interrupt .section .trap.fixup.kernel .org \tbr_tt >> 2 @@ -259,25 +261,52 @@ __trap_fixup_kernel_data_tlb_miss: .org TBR_TT_TRAP0 .rept 127 bra __entry_uspace_softprog_interrupt - bra __break_step_uspace_softprog_interrupt - .long 0,0 + .long 0,0,0 .endr .org TBR_TT_BREAK bra __entry_break .long 0,0,0 + .section .trap.fixup.user + .org TBR_TT_TRAP0 >> 2 + .rept 127 + .long __break_step_uspace_softprog_interrupt + .endr + .org TBR_TT_BREAK >> 2 + .long 0 + # miscellaneous kernel mode entry points .section .trap.kernel .org TBR_TT_TRAP0 - .rept 127 bra __entry_kernel_softprog_interrupt - bra __break_step_kernel_softprog_interrupt - .long 0,0 + .org TBR_TT_TRAP1 + bra __entry_kernel_softprog_interrupt + + # trap #2 in kernel - reenable interrupts + .org TBR_TT_TRAP2 + bra __entry_kernel_external_interrupt_virtual_reenable + + # miscellaneous kernel traps + .org TBR_TT_TRAP3 + .rept 124 + bra __entry_kernel_softprog_interrupt + .long 0,0,0 .endr .org TBR_TT_BREAK bra __entry_break .long 0,0,0 + .section .trap.fixup.kernel + .org TBR_TT_TRAP0 >> 2 + .long __break_step_kernel_softprog_interrupt + .long __break_step_kernel_softprog_interrupt + .long __break_step_kernel_external_interrupt_virtual_reenable + .rept 124 + .long __break_step_kernel_softprog_interrupt + .endr + .org TBR_TT_BREAK >> 2 + .long 0 + # miscellaneous debug mode entry points .section .trap.break .org TBR_TT_BREAK diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index c69d499d28cf..1d21c8d34d8a 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -141,7 +141,10 @@ __entry_uspace_external_interrupt_reentry: movsg gner0,gr4 movsg gner1,gr5 - stdi gr4,@(gr28,#REG_GNER0) + stdi.p gr4,@(gr28,#REG_GNER0) + + # interrupts start off fully disabled in the interrupt handler + subcc gr0,gr0,gr0,icc2 /* set Z and clear C */ # set up kernel global registers sethi.p %hi(__kernel_current_task),gr5 @@ -193,9 +196,8 @@ __entry_uspace_external_interrupt_reentry: .type __entry_kernel_external_interrupt,@function __entry_kernel_external_interrupt: LEDS 0x6210 - - sub sp,gr15,gr31 - LEDS32 +// sub sp,gr15,gr31 +// LEDS32 # set up the stack pointer or.p sp,gr0,gr30 @@ -231,7 +233,10 @@ __entry_kernel_external_interrupt_reentry: stdi gr24,@(gr28,#REG_GR(24)) stdi gr26,@(gr28,#REG_GR(26)) sti gr29,@(gr28,#REG_GR(29)) - stdi gr30,@(gr28,#REG_GR(30)) + stdi.p gr30,@(gr28,#REG_GR(30)) + + # note virtual interrupts will be fully enabled upon return + subicc gr0,#1,gr0,icc2 /* clear Z, set C */ movsg tbr ,gr20 movsg psr ,gr22 @@ -267,7 +272,10 @@ __entry_kernel_external_interrupt_reentry: movsg gner0,gr4 movsg gner1,gr5 - stdi gr4,@(gr28,#REG_GNER0) + stdi.p gr4,@(gr28,#REG_GNER0) + + # interrupts start off fully disabled in the interrupt handler + subcc gr0,gr0,gr0,icc2 /* set Z and clear C */ # set the return address sethi.p %hi(__entry_return_from_kernel_interrupt),gr4 @@ -291,6 +299,45 @@ __entry_kernel_external_interrupt_reentry: .size __entry_kernel_external_interrupt,.-__entry_kernel_external_interrupt +############################################################################### +# +# deal with interrupts that were actually virtually disabled +# - we need to really disable them, flag the fact and return immediately +# - if you change this, you must alter break.S also +# +############################################################################### + .balign L1_CACHE_BYTES + .globl __entry_kernel_external_interrupt_virtually_disabled + .type __entry_kernel_external_interrupt_virtually_disabled,@function +__entry_kernel_external_interrupt_virtually_disabled: + movsg psr,gr30 + andi gr30,#~PSR_PIL,gr30 + ori gr30,#PSR_PIL_14,gr30 ; debugging interrupts only + movgs gr30,psr + subcc gr0,gr0,gr0,icc2 ; leave Z set, clear C + rett #0 + + .size __entry_kernel_external_interrupt_virtually_disabled,.-__entry_kernel_external_interrupt_virtually_disabled + +############################################################################### +# +# deal with re-enablement of interrupts that were pending when virtually re-enabled +# - set ICC2.C, re-enable the real interrupts and return +# - we can clear ICC2.Z because we shouldn't be here if it's not 0 [due to TIHI] +# - if you change this, you must alter break.S also +# +############################################################################### + .balign L1_CACHE_BYTES + .globl __entry_kernel_external_interrupt_virtual_reenable + .type __entry_kernel_external_interrupt_virtual_reenable,@function +__entry_kernel_external_interrupt_virtual_reenable: + movsg psr,gr30 + andi gr30,#~PSR_PIL,gr30 ; re-enable interrupts + movgs gr30,psr + subicc gr0,#1,gr0,icc2 ; clear Z, set C + rett #0 + + .size __entry_kernel_external_interrupt_virtual_reenable,.-__entry_kernel_external_interrupt_virtual_reenable ############################################################################### # @@ -335,6 +382,7 @@ __entry_uspace_softprog_interrupt_reentry: sethi.p %hi(__entry_return_from_user_exception),gr23 setlo %lo(__entry_return_from_user_exception),gr23 + bra __entry_common .size __entry_uspace_softprog_interrupt,.-__entry_uspace_softprog_interrupt @@ -495,7 +543,10 @@ __entry_common: movsg gner0,gr4 movsg gner1,gr5 - stdi gr4,@(gr28,#REG_GNER0) + stdi.p gr4,@(gr28,#REG_GNER0) + + # set up virtual interrupt disablement + subicc gr0,#1,gr0,icc2 /* clear Z flag, set C flag */ # set up kernel global registers sethi.p %hi(__kernel_current_task),gr5 diff --git a/arch/frv/kernel/head.S b/arch/frv/kernel/head.S index c73b4fe9f6ca..29a5265489b7 100644 --- a/arch/frv/kernel/head.S +++ b/arch/frv/kernel/head.S @@ -513,6 +513,9 @@ __head_mmu_enabled: movgs gr0,ccr movgs gr0,cccr + # initialise the virtual interrupt handling + subcc gr0,gr0,gr0,icc2 /* set Z, clear C */ + #ifdef CONFIG_MMU movgs gr3,scr2 movgs gr3,scr3 diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index 59580c59c62c..27ab4c30aac6 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -287,18 +287,11 @@ asmlinkage void do_IRQ(void) struct irq_source *source; int level, cpu; + irq_enter(); + level = (__frame->tbr >> 4) & 0xf; cpu = smp_processor_id(); -#if 0 - { - static u32 irqcount; - *(volatile u32 *) 0xe1200004 = ~((irqcount++ << 8) | level); - *(volatile u16 *) 0xffc00100 = (u16) ~0x9999; - mb(); - } -#endif - if ((unsigned long) __frame - (unsigned long) (current + 1) < 512) BUG(); @@ -308,40 +301,12 @@ asmlinkage void do_IRQ(void) kstat_this_cpu.irqs[level]++; - irq_enter(); - for (source = frv_irq_levels[level].sources; source; source = source->next) source->doirq(source); - irq_exit(); - __clr_MASK(level); - /* only process softirqs if we didn't interrupt another interrupt handler */ - if ((__frame->psr & PSR_PIL) == PSR_PIL_0) - if (local_softirq_pending()) - do_softirq(); - -#ifdef CONFIG_PREEMPT - local_irq_disable(); - while (--current->preempt_count == 0) { - if (!(__frame->psr & PSR_S) || - current->need_resched == 0 || - in_interrupt()) - break; - current->preempt_count++; - local_irq_enable(); - preempt_schedule(); - local_irq_disable(); - } -#endif - -#if 0 - { - *(volatile u16 *) 0xffc00100 = (u16) ~0x6666; - mb(); - } -#endif + irq_exit(); } /* end do_IRQ() */ diff --git a/include/asm-frv/spr-regs.h b/include/asm-frv/spr-regs.h index ef472f058d9c..c2a541ef828d 100644 --- a/include/asm-frv/spr-regs.h +++ b/include/asm-frv/spr-regs.h @@ -98,6 +98,7 @@ #define TBR_TT_TRAP0 (0x80 << 4) #define TBR_TT_TRAP1 (0x81 << 4) #define TBR_TT_TRAP2 (0x82 << 4) +#define TBR_TT_TRAP3 (0x83 << 4) #define TBR_TT_TRAP126 (0xfe << 4) #define TBR_TT_BREAK (0xff << 4) diff --git a/include/asm-frv/system.h b/include/asm-frv/system.h index d2aea70a5f64..f72ff0c4dc0b 100644 --- a/include/asm-frv/system.h +++ b/include/asm-frv/system.h @@ -40,8 +40,84 @@ do { \ /* * interrupt flag manipulation + * - use virtual interrupt management since touching the PSR is slow + * - ICC2.Z: T if interrupts virtually disabled + * - ICC2.C: F if interrupts really disabled + * - if Z==1 upon interrupt: + * - C is set to 0 + * - interrupts are really disabled + * - entry.S returns immediately + * - uses TIHI (TRAP if Z==0 && C==0) #2 to really reenable interrupts + * - if taken, the trap: + * - sets ICC2.C + * - enables interrupts */ -#define local_irq_disable() \ +#define local_irq_disable() \ +do { \ + /* set Z flag, but don't change the C flag */ \ + asm volatile(" andcc gr0,gr0,gr0,icc2 \n" \ + : \ + : \ + : "memory", "icc2" \ + ); \ +} while(0) + +#define local_irq_enable() \ +do { \ + /* clear Z flag and then test the C flag */ \ + asm volatile(" oricc gr0,#1,gr0,icc2 \n" \ + " tihi icc2,gr0,#2 \n" \ + : \ + : \ + : "memory", "icc2" \ + ); \ +} while(0) + +#define local_save_flags(flags) \ +do { \ + typecheck(unsigned long, flags); \ + asm volatile("movsg ccr,%0" \ + : "=r"(flags) \ + : \ + : "memory"); \ + \ + /* shift ICC2.Z to bit 0 */ \ + flags >>= 26; \ + \ + /* make flags 1 if interrupts disabled, 0 otherwise */ \ + flags &= 1UL; \ +} while(0) + +#define irqs_disabled() \ + ({unsigned long flags; local_save_flags(flags); flags; }) + +#define local_irq_save(flags) \ +do { \ + typecheck(unsigned long, flags); \ + local_save_flags(flags); \ + local_irq_disable(); \ +} while(0) + +#define local_irq_restore(flags) \ +do { \ + typecheck(unsigned long, flags); \ + \ + /* load the Z flag by turning 1 if disabled into 0 if disabled \ + * and thus setting the Z flag but not the C flag */ \ + asm volatile(" xoricc %0,#1,gr0,icc2 \n" \ + /* then test Z=0 and C=0 */ \ + " tihi icc2,gr0,#2 \n" \ + : \ + : "r"(flags) \ + : "memory", "icc2" \ + ); \ + \ +} while(0) + +/* + * real interrupt flag manipulation + */ +#define __local_irq_disable() \ do { \ unsigned long psr; \ asm volatile(" movsg psr,%0 \n" \ @@ -53,7 +129,7 @@ do { \ : "memory"); \ } while(0) -#define local_irq_enable() \ +#define __local_irq_enable() \ do { \ unsigned long psr; \ asm volatile(" movsg psr,%0 \n" \ @@ -64,7 +140,7 @@ do { \ : "memory"); \ } while(0) -#define local_save_flags(flags) \ +#define __local_save_flags(flags) \ do { \ typecheck(unsigned long, flags); \ asm("movsg psr,%0" \ @@ -73,7 +149,7 @@ do { \ : "memory"); \ } while(0) -#define local_irq_save(flags) \ +#define __local_irq_save(flags) \ do { \ unsigned long npsr; \ typecheck(unsigned long, flags); \ @@ -86,7 +162,7 @@ do { \ : "memory"); \ } while(0) -#define local_irq_restore(flags) \ +#define __local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ asm volatile(" movgs %0,psr \n" \ @@ -95,7 +171,7 @@ do { \ : "memory"); \ } while(0) -#define irqs_disabled() \ +#define __irqs_disabled() \ ((__get_PSR() & PSR_PIL) >= PSR_PIL_14) /* -- cgit v1.3-6-gb490 From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 01:34:23 -0800 Subject: [NETFILTER]: Fix xfrm lookup after SNAT To find out if a packet needs to be handled by IPsec after SNAT, packets are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This breaks SNAT of non-unicast packets to non-local addresses because the packet is routed as incoming packet and no neighbour entry is bound to the dst_entry. In general, it seems to be a bad idea to replace the dst_entry after the packet was already sent to the output routine because its state might not match what's expected. This patch changes the xfrm lookup in POST_ROUTING to re-use the original dst_entry without routing the packet again. This means no policy routing can be used for transport mode transforms (which keep the original route) when packets are SNATed to match the policy, but it looks like the best we can do for now. Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4.h | 2 +- net/ipv4/netfilter.c | 41 ++++++++++++++++++++++++++++++++++ net/ipv4/netfilter/ip_nat_standalone.c | 6 ++--- 3 files changed, 45 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fdc4a9527343..43c09d790b83 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -79,7 +79,7 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); - +extern int ip_xfrm_me_harder(struct sk_buff **pskb); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 52a3d7c57907..ed42cdc57cd9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +#ifdef CONFIG_XFRM +int ip_xfrm_me_harder(struct sk_buff **pskb) +{ + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + return 0; + if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + return -1; + + dst = (*pskb)->dst; + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + return -1; + + dst_release((*pskb)->dst); + (*pskb)->dst = dst; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + return 0; +} +EXPORT_SYMBOL(ip_xfrm_me_harder); +#endif + void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(ip_nat_decode_session); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 92c54999a19d..7c3f7d380240 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum, return NF_ACCEPT; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); +#ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.ip != ct->tuplehash[!dir].tuple.dst.ip -#ifdef CONFIG_XFRM || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all -#endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; } +#endif return ret; } -- cgit v1.3-6-gb490 From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 15 Feb 2006 22:50:10 +0300 Subject: [PATCH] fix zap_thread's ptrace related problems 1. The tracee can go from ptrace_stop() to do_signal_stop() after __ptrace_unlink(p). 2. It is unsafe to __ptrace_unlink(p) while p->parent may wait for tasklist_lock in ptrace_detach(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Eric W. Biederman Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/ptrace.h | 1 + kernel/ptrace.c | 25 +++++++++++++++---------- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 055378d2513e..0e1c95074d42 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm) do_each_thread(g,p) { if (mm == p->mm && p != tsk && p->ptrace && p->parent->mm == mm) { - __ptrace_unlink(p); + __ptrace_detach(p, 0); } } while_each_thread(g,p); write_unlock_irq(&tasklist_lock); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 9d5cd106b344..0d36750fc0f1 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); +extern void __ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); extern int ptrace_request(struct task_struct *child, long request, long addr, long data); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d2cf144d0af5..d95a72c9279d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child) */ void __ptrace_unlink(task_t *child) { - if (!child->ptrace) - BUG(); + BUG_ON(!child->ptrace); + child->ptrace = 0; if (!list_empty(&child->ptrace_list)) { list_del_init(&child->ptrace_list); @@ -184,22 +184,27 @@ bad: return retval; } +void __ptrace_detach(struct task_struct *child, unsigned int data) +{ + child->exit_code = data; + /* .. re-parent .. */ + __ptrace_unlink(child); + /* .. and wake it up. */ + if (child->exit_state != EXIT_ZOMBIE) + wake_up_process(child); +} + int ptrace_detach(struct task_struct *child, unsigned int data) { if (!valid_signal(data)) - return -EIO; + return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); - /* .. re-parent .. */ - child->exit_code = data; - write_lock_irq(&tasklist_lock); - __ptrace_unlink(child); - /* .. and wake it up. */ - if (child->exit_state != EXIT_ZOMBIE) - wake_up_process(child); + if (child->ptrace) + __ptrace_detach(child, data); write_unlock_irq(&tasklist_lock); return 0; -- cgit v1.3-6-gb490 From 8ed9b2c7a804335004e4bd3b4c6989c5b6bc243f Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 13 Feb 2006 05:29:57 -0500 Subject: [IA64-SGI] sn2 minor fixes and cleanups General SN2 code cleanup: - Do not initialize global variables to zero - Use kzalloc instead of kmalloc+memset - Check kmalloc return values - Do not obfuscate spin lock calls - Remove some unused code - Various formatting cleanups Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 96 ++++++++++++++------------------- arch/ia64/sn/kernel/sn2/prominfo_proc.c | 25 ++++----- arch/ia64/sn/kernel/sn2/sn2_smp.c | 35 ++++++------ arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 22 ++++---- arch/ia64/sn/kernel/tiocx.c | 4 +- arch/ia64/sn/pci/pci_dma.c | 16 +++--- arch/ia64/sn/pci/pcibr/pcibr_ate.c | 29 +++------- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 14 ++--- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 9 ++-- include/asm-ia64/sn/bte.h | 6 +-- include/asm-ia64/sn/pcibr_provider.h | 14 +---- include/asm-ia64/sn/sn_feature_sets.h | 3 -- 12 files changed, 111 insertions(+), 162 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 3437c2390429..dfb3f2902379 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -23,6 +23,10 @@ #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); + static struct list_head sn_sysdata_list; /* sysdata list struct */ @@ -40,12 +44,12 @@ struct brick { struct slab_info slab_info[MAX_SLABS + 1]; }; -int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ -static int max_segment_number = 0; /* Default highest segment number */ -static int max_pcibus_number = 255; /* Default highest pci bus number */ +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ /* * Hooks and struct for unsupported pci providers @@ -84,7 +88,6 @@ static inline u64 sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -94,7 +97,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, (u64) nasid, (u64) widget_num, (u64) device_num, (u64) address, 0, 0, 0); return ret_stuff.status; - } /* @@ -102,7 +104,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, */ static inline u64 sal_get_hubdev_info(u64 handle, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -118,7 +119,6 @@ static inline u64 sal_get_hubdev_info(u64 handle, u64 address) */ static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -215,7 +215,7 @@ static void __init sn_fixup_ionodes(void) struct hubdev_info *hubdev; u64 status; u64 nasid; - int i, widget, device; + int i, widget, device, size; /* * Get SGI Specific HUB chipset information. @@ -251,48 +251,37 @@ static void __init sn_fixup_ionodes(void) if (!hubdev->hdi_flush_nasid_list.widget_p) continue; + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); hubdev->hdi_flush_nasid_list.widget_p = - kmalloc((HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *), - GFP_KERNEL); - memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0, - (HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *)); + kzalloc(size, GFP_KERNEL); + if (!hubdev->hdi_flush_nasid_list.widget_p) + BUG(); for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { - sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET * - sizeof(struct - sn_flush_device_kernel), - GFP_KERNEL); + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); if (!sn_flush_device_kernel) BUG(); - memset(sn_flush_device_kernel, 0x0, - DEV_PER_WIDGET * - sizeof(struct sn_flush_device_kernel)); dev_entry = sn_flush_device_kernel; for (device = 0; device < DEV_PER_WIDGET; device++,dev_entry++) { - dev_entry->common = kmalloc(sizeof(struct - sn_flush_device_common), - GFP_KERNEL); + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); if (!dev_entry->common) BUG(); - memset(dev_entry->common, 0x0, sizeof(struct - sn_flush_device_common)); if (sn_prom_feature_available( PRF_DEVICE_FLUSH_LIST)) status = sal_get_device_dmaflush_list( - nasid, - widget, - device, - (u64)(dev_entry->common)); + nasid, widget, device, + (u64)(dev_entry->common)); else status = sn_device_fixup_war(nasid, - widget, - device, - dev_entry->common); + widget, device, + dev_entry->common); if (status != SALRET_OK) panic("SAL call failed: %s\n", ia64_sal_strerror(status)); @@ -383,13 +372,12 @@ void sn_pci_fixup_slot(struct pci_dev *dev) pci_dev_get(dev); /* for the sysdata pointer */ pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); - if (pcidev_info <= 0) + if (!pcidev_info) BUG(); /* Cannot afford to run out of memory */ - sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL); - if (sn_irq_info <= 0) + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) BUG(); /* Cannot afford to run out of memory */ - memset(sn_irq_info, 0, sizeof(struct sn_irq_info)); /* Call to retrieve pci device information needed by kernel. */ status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, @@ -482,13 +470,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev) */ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { - int status = 0; + int status; int nasid, cnode; struct pci_controller *controller; struct sn_pci_controller *sn_controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; - void *provider_soft = NULL; + void *provider_soft; struct sn_pcibus_provider *provider; status = sal_get_pcibus_info((u64) segment, (u64) busnum, @@ -535,6 +523,8 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) bus->sysdata = controller; if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller); + else + provider_soft = NULL; if (provider_soft == NULL) { /* fixup failed or not applicable */ @@ -638,13 +628,8 @@ void sn_bus_free_sysdata(void) static int __init sn_pci_init(void) { - int i = 0; - int j = 0; + int i, j; struct pci_dev *pci_dev = NULL; - extern void sn_init_cpei_timer(void); -#ifdef CONFIG_PROC_FS - extern void register_sn_procfs(void); -#endif if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) return 0; @@ -700,32 +685,29 @@ static int __init sn_pci_init(void) */ void hubdev_init_node(nodepda_t * npda, cnodeid_t node) { - struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); if (node >= num_online_nodes()) /* Headless/memless IO nodes */ - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0), - sizeof(struct - hubdev_info)); + pg = NODE_DATA(0); else - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node), - sizeof(struct - hubdev_info)); - npda->pdinfo = (void *)hubdev_info; + pg = NODE_DATA(node); + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; } geoid_t cnodeid_get_geoid(cnodeid_t cnode) { - struct hubdev_info *hubdev; hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); return hubdev->hdi_geoid; - } subsys_initcall(sn_pci_init); diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index 81c63b2f8ae9..6ae276d5d50c 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. * * Module to export the system's Firmware Interface Tables, including * PROM revision numbers and banners, in /proc @@ -190,7 +190,7 @@ static int read_version_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_version(page, (unsigned long)data); @@ -202,7 +202,7 @@ static int read_fit_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_fit(page, (unsigned long)data); @@ -229,13 +229,16 @@ int __init prominfo_init(void) struct proc_dir_entry *p; cnodeid_t cnodeid; unsigned long nasid; + int size; char name[NODE_NAME_LEN]; if (!ia64_platform_is("sn2")) return 0; - proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *), - GFP_KERNEL); + size = num_online_nodes() * sizeof(struct proc_dir_entry *); + proc_entries = kzalloc(size, GFP_KERNEL); + if (!proc_entries) + return -ENOMEM; sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); @@ -244,14 +247,12 @@ int __init prominfo_init(void) sprintf(name, "node%d", cnodeid); *entp = proc_mkdir(name, sgi_prominfo_entry); nasid = cnodeid_to_nasid(cnodeid); - p = create_proc_read_entry( - "fit", 0, *entp, read_fit_entry, - (void *)nasid); + p = create_proc_read_entry("fit", 0, *entp, read_fit_entry, + (void *)nasid); if (p) p->owner = THIS_MODULE; - p = create_proc_read_entry( - "version", 0, *entp, read_version_entry, - (void *)nasid); + p = create_proc_read_entry("version", 0, *entp, + read_version_entry, (void *)nasid); if (p) p->owner = THIS_MODULE; entp++; @@ -263,7 +264,7 @@ int __init prominfo_init(void) void __exit prominfo_exit(void) { struct proc_dir_entry **entp; - unsigned cnodeid; + unsigned int cnodeid; char name[NODE_NAME_LEN]; entp = proc_entries; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index f153a4c35c70..24eefb2fc55f 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -46,8 +46,14 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long); +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); /* * Note: some is the following is captured here to make degugging easier @@ -59,16 +65,6 @@ void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned lon #define reset_max_active_on_deadlock() 1 #define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) -static inline void ptc_lock(int sh1, unsigned long *flagp) -{ - spin_lock_irqsave(PTC_LOCK(sh1), *flagp); -} - -static inline void ptc_unlock(int sh1, unsigned long flags) -{ - spin_unlock_irqrestore(PTC_LOCK(sh1), flags); -} - struct ptc_stats { unsigned long ptc_l; unsigned long change_rid; @@ -82,6 +78,8 @@ struct ptc_stats { unsigned long shub_ptc_flushes_not_my_mm; }; +#define sn2_ptctest 0 + static inline unsigned long wait_piowc(void) { volatile unsigned long *piows; @@ -200,7 +198,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, max_active = max_active_pio(shub1); itc = ia64_get_itc(); - ptc_lock(shub1, &flags); + spin_lock_irqsave(PTC_LOCK(shub1), flags); itc2 = ia64_get_itc(); __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; @@ -258,7 +256,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, ia64_srlz_d(); } - ptc_unlock(shub1, flags); + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); preempt_enable(); } @@ -270,11 +268,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, volatile unsigned long *ptc0, unsigned long data0, - volatile unsigned long *ptc1, unsigned long data1) + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) { - extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); short nasid, i; unsigned long *piows, zeroval, n; diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index a06719d752a0..c686d9c12f7b 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -6,11 +6,11 @@ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include -#include #ifdef CONFIG_PROC_FS #include #include +#include #include static int partition_id_show(struct seq_file *s, void *p) @@ -90,10 +90,10 @@ static int coherence_id_open(struct inode *inode, struct file *file) return single_open(file, coherence_id_show, NULL); } -static struct proc_dir_entry *sn_procfs_create_entry( - const char *name, struct proc_dir_entry *parent, - int (*openfunc)(struct inode *, struct file *), - int (*releasefunc)(struct inode *, struct file *)) +static struct proc_dir_entry +*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent, + int (*openfunc)(struct inode *, struct file *), + int (*releasefunc)(struct inode *, struct file *)) { struct proc_dir_entry *e = create_proc_entry(name, 0444, parent); @@ -126,24 +126,24 @@ void register_sn_procfs(void) return; sn_procfs_create_entry("partition_id", sgi_proc_dir, - partition_id_open, single_release); + partition_id_open, single_release); sn_procfs_create_entry("system_serial_number", sgi_proc_dir, - system_serial_number_open, single_release); + system_serial_number_open, single_release); sn_procfs_create_entry("licenseID", sgi_proc_dir, - licenseID_open, single_release); + licenseID_open, single_release); e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, - sn_force_interrupt_open, single_release); + sn_force_interrupt_open, single_release); if (e) e->proc_fops->write = sn_force_interrupt_write_proc; sn_procfs_create_entry("coherence_id", sgi_proc_dir, - coherence_id_open, single_release); + coherence_id_open, single_release); sn_procfs_create_entry("sn_topology", sgi_proc_dir, - sn_topology_open, sn_topology_release); + sn_topology_open, sn_topology_release); } #endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index d263d3e8fbb9..8a56f8b5ffa2 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -284,12 +284,10 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, if ((nasid & 1) == 0) return NULL; - sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL); + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); if (sn_irq_info == NULL) return NULL; - memset(sn_irq_info, 0x0, sn_irq_size); - status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, req_nasid, slice); if (status) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 5a36292388eb..b4b84c269210 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -335,10 +335,10 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, - pci_domain_nr(bus), bus->number, - 0, /* io */ - 0, /* read */ - port, size, __pa(val)); + pci_domain_nr(bus), bus->number, + 0, /* io */ + 0, /* read */ + port, size, __pa(val)); if (isrv.status == 0) return size; @@ -381,10 +381,10 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, - pci_domain_nr(bus), bus->number, - 0, /* io */ - 1, /* write */ - port, size, __pa(&val)); + pci_domain_nr(bus), bus->number, + 0, /* io */ + 1, /* write */ + port, size, __pa(&val)); if (isrv.status == 0) return size; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index aa3fa5152a32..1f0253bfe0a0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. */ #include @@ -12,7 +12,7 @@ #include #include -int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ +int pcibr_invalidate_ate; /* by default don't invalidate ATE on free */ /* * mark_ate: Mark the ate as either free or inuse. @@ -20,14 +20,12 @@ int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ static void mark_ate(struct ate_resource *ate_resource, int start, int number, u64 value) { - u64 *ate = ate_resource->ate; int index; int length = 0; for (index = start; length < number; index++, length++) ate[index] = value; - } /* @@ -37,7 +35,6 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number, static int find_free_ate(struct ate_resource *ate_resource, int start, int count) { - u64 *ate = ate_resource->ate; int index; int start_free; @@ -70,12 +67,10 @@ static int find_free_ate(struct ate_resource *ate_resource, int start, static inline void free_ate_resource(struct ate_resource *ate_resource, int start) { - mark_ate(ate_resource, start, ate_resource->ate[start], 0); if ((ate_resource->lowest_free_index > start) || (ate_resource->lowest_free_index < 0)) ate_resource->lowest_free_index = start; - } /* @@ -84,7 +79,6 @@ static inline void free_ate_resource(struct ate_resource *ate_resource, static inline int alloc_ate_resource(struct ate_resource *ate_resource, int ate_needed) { - int start_index; /* @@ -118,19 +112,12 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource, */ int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) { - int status = 0; - u64 flag; + int status; + unsigned long flags; - flag = pcibr_lock(pcibus_info); + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count); - - if (status < 0) { - /* Failed to allocate */ - pcibr_unlock(pcibus_info, flag); - return -1; - } - - pcibr_unlock(pcibus_info, flag); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); return status; } @@ -182,7 +169,7 @@ void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V)); } - flags = pcibr_lock(pcibus_info); + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); free_ate_resource(&pcibus_info->pbi_int_ate_resource, index); - pcibr_unlock(pcibus_info, flags); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 54ce5b7ceed2..9f86bb6519aa 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -137,14 +137,12 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, pci_addr |= PCI64_ATTR_VIRTUAL; return pci_addr; - } static dma_addr_t pcibr_dmatrans_direct32(struct pcidev_info * info, u64 paddr, size_t req_size, u64 flags) { - struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> pdi_pcibus_info; @@ -171,7 +169,6 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, } return PCI32_DIRECT_BASE | offset; - } /* @@ -218,9 +215,8 @@ void sn_dma_flush(u64 addr) u64 flags; u64 itte; struct hubdev_info *hubinfo; - volatile struct sn_flush_device_kernel *p; - volatile struct sn_flush_device_common *common; - + struct sn_flush_device_kernel *p; + struct sn_flush_device_common *common; struct sn_flush_nasid_entry *flush_nasid_list; if (!sn_ioif_inited) @@ -310,8 +306,7 @@ void sn_dma_flush(u64 addr) (common->sfdl_slot - 1)); } } else { - spin_lock_irqsave((spinlock_t *)&p->sfdl_flush_lock, - flags); + spin_lock_irqsave(&p->sfdl_flush_lock, flags); *common->sfdl_flush_addr = 0; /* force an interrupt. */ @@ -322,8 +317,7 @@ void sn_dma_flush(u64 addr) cpu_relax(); /* okay, everything is synched up. */ - spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, - flags); + spin_unlock_irqrestore(&p->sfdl_flush_lock, flags); } return; } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 2fac27049bf6..98f716bd92f0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -163,9 +163,12 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont /* Setup the PMU ATE map */ soft->pbi_int_ate_resource.lowest_free_index = 0; soft->pbi_int_ate_resource.ate = - kmalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); - memset(soft->pbi_int_ate_resource.ate, 0, - (soft->pbi_int_ate_size * sizeof(u64))); + kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); + + if (!soft->pbi_int_ate_resource.ate) { + kfree(soft); + return NULL; + } if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) { /* TIO PCI Bridge: find nearest node with CPUs */ diff --git a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h index 01e5b4103235..5335d87ca5f8 100644 --- a/include/asm-ia64/sn/bte.h +++ b/include/asm-ia64/sn/bte.h @@ -46,7 +46,7 @@ #define BTES_PER_NODE (is_shub2() ? 4 : 2) #define MAX_BTES_PER_NODE 4 -#define BTE2OFF_CTRL (0) +#define BTE2OFF_CTRL 0 #define BTE2OFF_SRC (SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0) #define BTE2OFF_DEST (SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0) #define BTE2OFF_NOTIFY (SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0) @@ -75,11 +75,11 @@ : base + (BTEOFF_NOTIFY/8)) /* Define hardware modes */ -#define BTE_NOTIFY (IBCT_NOTIFY) +#define BTE_NOTIFY IBCT_NOTIFY #define BTE_NORMAL BTE_NOTIFY #define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE) /* Use a reserved bit to let the caller specify a wait for any BTE */ -#define BTE_WACQUIRE (0x4000) +#define BTE_WACQUIRE 0x4000 /* Use the BTE on the node with the destination memory */ #define BTE_USE_DEST (BTE_WACQUIRE << 1) /* Use any available BTE interface on any node for the transfer */ diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index 9334078b089a..a601d3af39b6 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H @@ -115,18 +115,6 @@ struct pcibus_info { spinlock_t pbi_lock; }; -/* - * pcibus_info structure locking macros - */ -inline static unsigned long -pcibr_lock(struct pcibus_info *pcibus_info) -{ - unsigned long flag; - spin_lock_irqsave(&pcibus_info->pbi_lock, flag); - return(flag); -} -#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag) - extern int pcibr_init_provider(void); extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *); extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t); diff --git a/include/asm-ia64/sn/sn_feature_sets.h b/include/asm-ia64/sn/sn_feature_sets.h index 9ca642cad338..ff33e3bd3f8e 100644 --- a/include/asm-ia64/sn/sn_feature_sets.h +++ b/include/asm-ia64/sn/sn_feature_sets.h @@ -12,9 +12,6 @@ */ -#include -#include - /* --------------------- PROM Features -----------------------------*/ extern int sn_prom_feature_available(int id); -- cgit v1.3-6-gb490 From d3454344b3507042e5d561d0cfed19e99cf2fc88 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 13 Feb 2006 05:32:09 -0500 Subject: [IA64] remove obsolete corporate address Remove obsolete SGI address Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/timer_interrupt.c | 7 +------ drivers/ide/pci/sgiioc4.c | 5 ----- include/asm-ia64/machvec_sn2.h | 7 +------ 3 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index adf5db2e2afe..fa7f69945917 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -22,11 +22,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 2b286e865163..43b96e298363 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -13,11 +13,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index e1b6cd63f49e..03d00faf03b5 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -20,11 +20,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan -- cgit v1.3-6-gb490 From c2a4969ba14e852bf4ee92c7db3b0cf82405a0c9 Mon Sep 17 00:00:00 2001 From: Dean Roe Date: Tue, 14 Feb 2006 15:01:23 -0600 Subject: [IA64-SGI] fix the size of __sn_cnodeid_to_nasid The __sn_cnodeid_to_nasid array was incorrectly sized at MAX_NUMNODES. On a large system, this array could overflow. The following patch corrects this by defining it to MAX_COMPACT_NODES. Signed-off-by: Dean Roe Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 2 +- include/asm-ia64/sn/arch.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 1672ecad77e2..5b84836c2171 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 1a3831c04af6..91c31be87b13 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * Compact node ID to nasid mappings kept in the per-cpu data areas of each * cpu. */ -DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); #define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) -- cgit v1.3-6-gb490 From 4c2cd96696ae0896ce4bcf725b9f0eaffafeb640 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 15 Feb 2006 08:02:21 -0600 Subject: [IA64-SGI] enforce proper ordering of callouts by XPC Fix XPC so that it does not deliver any messages until the connected callout has returned, as well as, prevent the disconnected callout to occur before the disconnecting callout has returned. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc_channel.c | 8 +++++--- arch/ia64/sn/kernel/xpc_main.c | 20 +++++++++++++------- include/asm-ia64/sn/xpc.h | 31 ++++++++++++++++++------------- 3 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 36e5437a0fb6..cdf6856ce089 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -738,7 +738,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* make sure all activity has settled down first */ - if (atomic_read(&ch->references) > 0) { + if (atomic_read(&ch->references) > 0 || + ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { return; } DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); @@ -775,7 +777,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* both sides are disconnected now */ - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { spin_unlock_irqrestore(&ch->lock, *irq_flags); xpc_disconnect_callout(ch, xpcDisconnected); spin_lock_irqsave(&ch->lock, *irq_flags); @@ -1300,7 +1302,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) "delivered=%d, partid=%d, channel=%d\n", nmsgs_sent, ch->partid, ch->number); - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { xpc_activate_kthreads(ch, nmsgs_sent); } } diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 9cd460dfe27e..8cbf16432570 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -750,12 +750,16 @@ xpc_daemonize_kthread(void *args) /* let registerer know that connection has been established */ spin_lock_irqsave(&ch->lock, irq_flags); - if (!(ch->flags & XPC_C_CONNECTCALLOUT)) { - ch->flags |= XPC_C_CONNECTCALLOUT; + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_connected_callout(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + /* * It is possible that while the callout was being * made that the remote partition sent some messages. @@ -777,15 +781,17 @@ xpc_daemonize_kthread(void *args) if (atomic_dec_return(&ch->kthreads_assigned) == 0) { spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTCALLOUT) && - !(ch->flags & XPC_C_DISCONNECTCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTCALLOUT; + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_disconnect_callout(ch, xpcDisconnecting); - } else { - spin_unlock_irqrestore(&ch->lock, irq_flags); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; } + spin_unlock_irqrestore(&ch->lock, irq_flags); if (atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); xpc_IPI_send_disengage(part); diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index 0c36928ffd8b..df7f5f4f3cde 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h @@ -508,19 +508,24 @@ struct xpc_channel { #define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ #define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ -#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */ -#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */ -#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */ - -#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */ -#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */ -#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */ -#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */ - -#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ -#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ -#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */ -#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */ +#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */ +#define XPC_C_CONNECTEDCALLOUT_MADE \ + 0x00000080 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTINGCALLOUT \ + 0x00010000 /* disconnecting callout initiated */ +#define XPC_C_DISCONNECTINGCALLOUT_MADE \ + 0x00020000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ -- cgit v1.3-6-gb490 From defbb2c929cbe89dc92239b303cd33d3c85e9a83 Mon Sep 17 00:00:00 2001 From: "hawkes@sgi.com" Date: Tue, 14 Feb 2006 10:40:17 -0800 Subject: [IA64] ia64: simplify and fix udelay() The original ia64 udelay() was simple, but flawed for platforms without synchronized ITCs: a preemption and migration to another CPU during the while-loop likely resulted in too-early termination or very, very lengthy looping. The first fix (now in 2.6.15) broke the delay loop into smaller, non-preemptible chunks, reenabling preemption between the chunks. This fix is flawed in that the total udelay is computed to be the sum of just the non-premptible while-loop pieces, i.e., not counting the time spent in the interim preemptible periods. If an interrupt or a migration occurs during one of these interim periods, then that time is invisible and only serves to lengthen the effective udelay(). This new fix backs out the current flawed fix and returns to a simple udelay(), fully preemptible and interruptible. It implements two simple alternative udelay() routines: one a default generic version that uses ia64_get_itc(), and the other an sn-specific version that uses that platform's RTC. Signed-off-by: John Hawkes Signed-off-by: Tony Luck --- arch/ia64/kernel/time.c | 39 +++++++++++++++++---------------------- arch/ia64/sn/kernel/sn2/timer.c | 19 +++++++++++++++++++ include/asm-ia64/timex.h | 2 ++ 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index a094ec49ccfa..307d01e15b2e 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -250,32 +250,27 @@ time_init (void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); } -#define SMALLUSECS 100 - -void -udelay (unsigned long usecs) +/* + * Generic udelay assumes that if preemption is allowed and the thread + * migrates to another CPU, that the ITC values are synchronized across + * all CPUs. + */ +static void +ia64_itc_udelay (unsigned long usecs) { - unsigned long start; - unsigned long cycles; - unsigned long smallusecs; + unsigned long start = ia64_get_itc(); + unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; - /* - * Execute the non-preemptible delay loop (because the ITC might - * not be synchronized between CPUS) in relatively short time - * chunks, allowing preemption between the chunks. - */ - while (usecs > 0) { - smallusecs = (usecs > SMALLUSECS) ? SMALLUSECS : usecs; - preempt_disable(); - cycles = smallusecs*local_cpu_data->cyc_per_usec; - start = ia64_get_itc(); + while (time_before(ia64_get_itc(), end)) + cpu_relax(); +} - while (ia64_get_itc() - start < cycles) - cpu_relax(); +void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; - preempt_enable(); - usecs -= smallusecs; - } +void +udelay (unsigned long usecs) +{ + (*ia64_udelay)(usecs); } EXPORT_SYMBOL(udelay); diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index deb9baf4d473..56a88b6df4b4 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -28,9 +29,27 @@ static struct time_interpolator sn2_interpolator = { .source = TIME_SOURCE_MMIO64 }; +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + void __init sn_timer_init(void) { sn2_interpolator.frequency = sn_rtc_cycles_per_second; sn2_interpolator.addr = RTC_COUNTER_ADDR; register_time_interpolator(&sn2_interpolator); + + ia64_udelay = &ia64_sn_udelay; } diff --git a/include/asm-ia64/timex.h b/include/asm-ia64/timex.h index 414aae060440..05a6baf8a472 100644 --- a/include/asm-ia64/timex.h +++ b/include/asm-ia64/timex.h @@ -15,6 +15,8 @@ typedef unsigned long cycles_t; +extern void (*ia64_udelay)(unsigned long usecs); + /* * For performance reasons, we don't want to define CLOCK_TICK_TRATE as * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George -- cgit v1.3-6-gb490 From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:10:22 -0800 Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish When a packet matching an IPsec policy is SNATed so it doesn't match any policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish crash because of a NULL pointer dereference. This patch directs these packets to the original output path instead. Since the packets have already passed the POST_ROUTING hook, but need to start at the beginning of the original output path which includes another POST_ROUTING invocation, a flag is added to the IPCB to indicate that the packet was rerouted and doesn't need to pass the POST_ROUTING hook again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 19 +++++++++++++++---- include/net/ip.h | 1 + include/net/xfrm.h | 1 - net/ipv4/ip_gre.c | 3 ++- net/ipv4/ip_output.c | 16 ++++++++++------ net/ipv4/ipip.c | 3 ++- net/ipv4/xfrm4_output.c | 13 ++++++++++--- 7 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4cf6088625c1..3ca3d9ee78a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { + if (!cond) + return 1; #ifndef CONFIG_NETFILTER_DEBUG if (list_empty(&nf_hooks[pf][hook])) return 1; @@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\ + __ret = (okfn)(skb); \ +__ret;}) + +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ +({int __ret; \ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { return okfn(*pskb); } diff --git a/include/net/ip.h b/include/net/ip.h index 8de0697b364c..fab3d5b3ab1c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -41,6 +41,7 @@ struct inet_skb_parm #define IPSKB_XFRM_TUNNEL_SIZE 2 #define IPSKB_XFRM_TRANSFORMED 4 #define IPSKB_FRAG_COMPLETE 8 +#define IPSKB_REROUTED 16 }; struct ipcm_cookie diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d09ca0e7d139..d6111a2f0a23 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -866,7 +866,6 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); -extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index abe23923e4e7..9981dcd68f11 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -830,7 +830,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3324fbfe528a..57d290d89ec2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -207,8 +207,10 @@ static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb->dst->xfrm != NULL) - return xfrm4_output_finish(skb); + if (skb->dst->xfrm != NULL) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } #endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) @@ -271,8 +273,9 @@ int ip_mc_output(struct sk_buff *skb) newskb->dev, ip_dev_loopback_xmit); } - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_output(struct sk_buff *skb) @@ -284,8 +287,9 @@ int ip_output(struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_queue_xmit(struct sk_buff *skb, int ipfragok) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e5cbe72c6b80..03d13742a4b8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -622,7 +622,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d4df0ddd424b..32ad229b4fed 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -152,10 +152,16 @@ error_nolock: goto out_exit; } -int xfrm4_output_finish(struct sk_buff *skb) +static int xfrm4_output_finish(struct sk_buff *skb) { int err; +#ifdef CONFIG_NETFILTER + if (!skb->dst->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); @@ -178,6 +184,7 @@ int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, - xfrm4_output_finish); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } -- cgit v1.3-6-gb490 From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:18:19 -0800 Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of nf_hook() nf_hook() is supposed to call the netfilter hook and return control of the packet back to the caller in case it may pass, the okfn is only used for queueing. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3ca3d9ee78a9..468896939843 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return okfn(*pskb); + return 1; } static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} struct flowi; -- cgit v1.3-6-gb490 From 9f672004ab1a8094bec1785b39ac683ab9eebebc Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Wed, 15 Feb 2006 15:17:34 -0800 Subject: [PATCH] neofb: avoid resetting display config on unblank (v2) There were two mistakes in the register-read-on-(un)blank approach. - First, without proper register (un)locking the value read back will always be zero, and this is what I missed entirely until just now. Due to this, the logic could not be verified at all and I tried some bogus checks which are completely stupid. - Second, the LCD status bit will always be set to zero when the backlight has been turned off. Reading the value back during unblank will disable the LCD unconditionally, regardless of the state it is supposed to be in, since we set it to zero beforehand. So this is what we do now: - create a new variable in struct neofb_par, and use that to determine whether to read back registers (initialized to true) - before actually blanking the screen, read back the register to sense any possible change made through Fn key combo - use proper neoUnlock() / neoLock() to actually read something - every call to neofb_blank() determines if we read back next time: blanking disables readback, unblanking (FB_BLANK_UNBLANK) enables it This should give us a nice and clean state machine. Has been thoroughly tested on a Dell Latitude CPiA / NM220 Chip docked to a C/Dock2 with attached CRT in all possible combinations of LCD/CRT on/off. I changed the config via Fn key, let the console blank, unblanked by keypress - works flawlessly. Signed-off-by: Christian Trefzer Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/neofb.c | 15 ++++++++++++--- include/video/neomagic.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c index b85e2b180a44..a2e201dc40f7 100644 --- a/drivers/video/neofb.c +++ b/drivers/video/neofb.c @@ -843,6 +843,9 @@ static int neofb_set_par(struct fb_info *info) par->SysIfaceCntl2 = 0xc0; /* VESA Bios sets this to 0x80! */ + /* Initialize: by default, we want display config register to be read */ + par->PanelDispCntlRegRead = 1; + /* Enable any user specified display devices. */ par->PanelDispCntlReg1 = 0x00; if (par->internal_display) @@ -1334,11 +1337,17 @@ static int neofb_blank(int blank_mode, struct fb_info *info) struct neofb_par *par = info->par; int seqflags, lcdflags, dpmsflags, reg; + /* - * Reload the value stored in the register, might have been changed via - * FN keystroke + * Reload the value stored in the register, if sensible. It might have + * been changed via FN keystroke. */ - par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03; + if (par->PanelDispCntlRegRead) { + neoUnlock(); + par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03; + neoLock(&par->state); + } + par->PanelDispCntlRegRead = !blank_mode; switch (blank_mode) { case FB_BLANK_POWERDOWN: /* powerdown - both sync lines down */ diff --git a/include/video/neomagic.h b/include/video/neomagic.h index 1d69049bd4c1..78b1f15a538f 100644 --- a/include/video/neomagic.h +++ b/include/video/neomagic.h @@ -159,6 +159,7 @@ struct neofb_par { unsigned char PanelDispCntlReg1; unsigned char PanelDispCntlReg2; unsigned char PanelDispCntlReg3; + unsigned char PanelDispCntlRegRead; unsigned char PanelVertCenterReg1; unsigned char PanelVertCenterReg2; unsigned char PanelVertCenterReg3; -- cgit v1.3-6-gb490 From 5f6164f3092832e0d9b12eed52e09a76bf39c64a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Feb 2006 15:17:39 -0800 Subject: [PATCH] add asm-generic/mman.h Make new MADV_REMOVE, MADV_DONTFORK, MADV_DOFORK consistent across all arches. The idea is to make it possible to use them portably even before distros include them in libc headers. Move common flags to asm-generic/mman.h Signed-off-by: Michael S. Tsirkin Cc: Roland Dreier Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mman.h | 8 +++++--- include/asm-arm/mman.h | 31 +------------------------------ include/asm-arm26/mman.h | 31 +------------------------------ include/asm-cris/mman.h | 31 +------------------------------ include/asm-frv/mman.h | 31 +------------------------------ include/asm-generic/mman.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/asm-h8300/mman.h | 31 +------------------------------ include/asm-i386/mman.h | 31 +------------------------------ include/asm-ia64/mman.h | 31 +------------------------------ include/asm-m32r/mman.h | 33 ++------------------------------- include/asm-m68k/mman.h | 31 +------------------------------ include/asm-mips/mman.h | 22 ++++++++++++---------- include/asm-parisc/mman.h | 8 +++++--- include/asm-powerpc/mman.h | 32 ++------------------------------ include/asm-s390/mman.h | 31 +------------------------------ include/asm-sh/mman.h | 31 +------------------------------ include/asm-sparc/mman.h | 31 ++----------------------------- include/asm-sparc64/mman.h | 31 ++----------------------------- include/asm-v850/mman.h | 30 +----------------------------- include/asm-x86_64/mman.h | 30 +----------------------------- include/asm-xtensa/mman.h | 22 ++++++++++++---------- 21 files changed, 96 insertions(+), 503 deletions(-) create mode 100644 include/asm-generic/mman.h (limited to 'include') diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h index a21515c16a43..5f24c755f577 100644 --- a/include/asm-alpha/mman.h +++ b/include/asm-alpha/mman.h @@ -42,9 +42,11 @@ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ -#define MADV_REMOVE 7 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h index 693ed859e632..54570d2e95b7 100644 --- a/include/asm-arm/mman.h +++ b/include/asm-arm/mman.h @@ -1,19 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ARM_MMAN_H__ */ diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h index 2096c50df888..4000a6c1b76b 100644 --- a/include/asm-arm26/mman.h +++ b/include/asm-arm26/mman.h @@ -1,19 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ARM_MMAN_H__ */ diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h index deddfb239ff5..1c35e1b66b46 100644 --- a/include/asm-cris/mman.h +++ b/include/asm-cris/mman.h @@ -3,19 +3,7 @@ /* verbatim copy of asm-i386/ version */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -25,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __CRIS_MMAN_H__ */ diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h index d3bca306da82..b4371e928683 100644 --- a/include/asm-frv/mman.h +++ b/include/asm-frv/mman.h @@ -1,19 +1,7 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,25 +11,8 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ASM_MMAN_H__ */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h new file mode 100644 index 000000000000..3b41d2bb70da --- /dev/null +++ b/include/asm-generic/mman.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_MMAN_H +#define _ASM_GENERIC_MMAN_H + +/* + Author: Michael S. Tsirkin , Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +/* compatibility flags */ +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 + +#endif diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h index ac0346f7d11d..b9f104f22a36 100644 --- a/include/asm-h8300/mman.h +++ b/include/asm-h8300/mman.h @@ -1,19 +1,7 @@ #ifndef __H8300_MMAN_H__ #define __H8300_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __H8300_MMAN_H__ */ diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h index ab2339a1d807..8fd9d7ab7faf 100644 --- a/include/asm-i386/mman.h +++ b/include/asm-i386/mman.h @@ -1,19 +1,7 @@ #ifndef __I386_MMAN_H__ #define __I386_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __I386_MMAN_H__ */ diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h index 357ebb780cc0..6ba179f12718 100644 --- a/include/asm-ia64/mman.h +++ b/include/asm-ia64/mman.h @@ -8,19 +8,7 @@ * David Mosberger-Tang , Hewlett-Packard Co */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x00100 /* stack-like segment */ #define MAP_GROWSUP 0x00200 /* register stack-like segment */ @@ -31,24 +19,7 @@ #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* _ASM_IA64_MMAN_H */ diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h index 6b02fe3fcff2..695a860c024f 100644 --- a/include/asm-m32r/mman.h +++ b/include/asm-m32r/mman.h @@ -1,21 +1,9 @@ #ifndef __M32R_MMAN_H__ #define __M32R_MMAN_H__ -/* orig : i386 2.6.0-test6 */ - -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +#include -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +/* orig : i386 2.6.0-test6 */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -25,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __M32R_MMAN_H__ */ diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h index efd12bc4ccb7..1626d37f4898 100644 --- a/include/asm-m68k/mman.h +++ b/include/asm-m68k/mman.h @@ -1,19 +1,7 @@ #ifndef __M68K_MMAN_H__ #define __M68K_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __M68K_MMAN_H__ */ diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h index 6d01e26830fa..046cf686bee7 100644 --- a/include/asm-mips/mman.h +++ b/include/asm-mips/mman.h @@ -60,17 +60,19 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 #endif /* _ASM_MMAN_H */ diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h index a381cf5c8f55..0ef15ee0f17e 100644 --- a/include/asm-parisc/mman.h +++ b/include/asm-parisc/mman.h @@ -38,7 +38,11 @@ #define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ #define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ #define MADV_VPS_INHERIT 7 /* Inherit parents page size */ -#define MADV_REMOVE 8 /* remove these pages & resources */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* The range 12-64 is reserved for page size specification. */ #define MADV_4K_PAGES 12 /* Use 4K pages */ @@ -49,8 +53,6 @@ #define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ #define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ #define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h index fcff25d13f13..24cf664a8295 100644 --- a/include/asm-powerpc/mman.h +++ b/include/asm-powerpc/mman.h @@ -1,6 +1,8 @@ #ifndef _ASM_POWERPC_MMAN_H #define _ASM_POWERPC_MMAN_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -8,19 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_LOCKED 0x80 @@ -29,27 +18,10 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h index d41ca1477010..7839767d837e 100644 --- a/include/asm-s390/mman.h +++ b/include/asm-s390/mman.h @@ -9,19 +9,7 @@ #ifndef __S390_MMAN_H__ #define __S390_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -31,24 +19,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __S390_MMAN_H__ */ diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h index 0e08d0573abc..156eb0225cf6 100644 --- a/include/asm-sh/mman.h +++ b/include/asm-sh/mman.h @@ -1,19 +1,7 @@ #ifndef __ASM_SH_MMAN_H #define __ASM_SH_MMAN_H -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ASM_SH_MMAN_H */ diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h index 4a298b2be859..88d1886abf3b 100644 --- a/include/asm-sparc/mman.h +++ b/include/asm-sparc/mman.h @@ -2,21 +2,10 @@ #ifndef __SPARC_MMAN_H__ #define __SPARC_MMAN_H__ -/* SunOS'ified... */ +#include -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +/* SunOS'ified... */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_INHERIT 0x80 /* SunOS doesn't do this, but... */ @@ -27,10 +16,6 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ @@ -48,18 +33,6 @@ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ -#define MADV_REMOVE 0x6 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 #endif /* __SPARC_MMAN_H__ */ diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h index d705ec92da8b..6fd878e61435 100644 --- a/include/asm-sparc64/mman.h +++ b/include/asm-sparc64/mman.h @@ -2,21 +2,10 @@ #ifndef __SPARC64_MMAN_H__ #define __SPARC64_MMAN_H__ -/* SunOS'ified... */ +#include -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +/* SunOS'ified... */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_INHERIT 0x80 /* SunOS doesn't do this, but... */ @@ -27,10 +16,6 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ @@ -48,18 +33,6 @@ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ -#define MADV_REMOVE 0x6 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 #endif /* __SPARC64_MMAN_H__ */ diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h index 7b851c310e41..edbf6edbfb37 100644 --- a/include/asm-v850/mman.h +++ b/include/asm-v850/mman.h @@ -1,18 +1,7 @@ #ifndef __V850_MMAN_H__ #define __V850_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -20,24 +9,7 @@ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __V850_MMAN_H__ */ diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h index b699a38c1c3c..dd5cb0534d37 100644 --- a/include/asm-x86_64/mman.h +++ b/include/asm-x86_64/mman.h @@ -1,19 +1,8 @@ #ifndef __X8664_MMAN_H__ #define __X8664_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_SEM 0x8 -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +#include -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ @@ -24,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h index e2d7afb679c8..ba394cbb4807 100644 --- a/include/asm-xtensa/mman.h +++ b/include/asm-xtensa/mman.h @@ -67,17 +67,19 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 #endif /* _XTENSA_MMAN_H */ -- cgit v1.3-6-gb490 From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 15 Feb 2006 15:17:40 -0800 Subject: [PATCH] hrtimer: fix multiple macro argument expansion For two macros the arguments were expanded twice, change them to inline functions to avoid it. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 6aca67a569a2..f3dec45ef874 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) /* convert a timespec to ktime_t format: */ -#define timespec_to_ktime(ts) ktime_set((ts).tv_sec, (ts).tv_nsec) +static inline ktime_t timespec_to_ktime(struct timespec ts) +{ + return ktime_set(ts.tv_sec, ts.tv_nsec); +} /* convert a timeval to ktime_t format: */ -#define timeval_to_ktime(tv) ktime_set((tv).tv_sec, (tv).tv_usec * 1000) +static inline ktime_t timeval_to_ktime(struct timeval tv) +{ + return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); +} /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) -- cgit v1.3-6-gb490 From 7bbb79403163e047c6e333ff169db34e3c969e65 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 16 Feb 2006 11:08:09 +0000 Subject: [ARM] Fix SMP initialisation oops A change to the SMP initialisation caused the following oops: CPU1: Booted secondary processor CPU1: D VIPT write-back cache CPU1: I cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets CPU1: D cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets <7>Calibrating delay loop... 83.14 BogoMIPS (lpj=415744) <1>Unable to handle kernel NULL pointer dereference at virtual address 0000001c ... PC is at enqueue_task+0x1c/0x64 LR is at activate_task+0xcc/0xe4 SMP initialisation now requires cpu_possible_map to be initialised in setup_arch(). Move this from smp_prepare_cpus() to smp_init_cpus() and call it from our setup_arch() if CONFIG_SMP is enabled. Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 1 - arch/arm/mach-integrator/platsmp.c | 21 +++++++++++++++------ arch/arm/mach-realview/platsmp.c | 21 +++++++++++++++------ include/asm-arm/smp.h | 5 +++++ 5 files changed, 40 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c45d10d07bde..68273b4dc882 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -771,6 +772,10 @@ void __init setup_arch(char **cmdline_p) paging_init(&meminfo, mdesc); request_standard_resources(&meminfo, mdesc); +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + cpu_init(); /* diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7338948bd7d3..02aa300c4633 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -338,7 +338,6 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; - cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); cpu_set(cpu, cpu_online_map); } diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c index ea10bd8c972c..1bc8534ef0c6 100644 --- a/arch/arm/mach-integrator/platsmp.c +++ b/arch/arm/mach-integrator/platsmp.c @@ -140,6 +140,18 @@ static void __init poke_milo(void) mb(); } +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + for (i = 0; i < ncores; i++) + cpu_set(i, cpu_possible_map); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = get_core_count(); @@ -176,14 +188,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) max_cpus = ncores; /* - * Initialise the possible/present maps. - * cpu_possible_map describes the set of CPUs which may be present - * cpu_present_map describes the set of CPUs populated + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. */ - for (i = 0; i < max_cpus; i++) { - cpu_set(i, cpu_possible_map); + for (i = 0; i < max_cpus; i++) cpu_set(i, cpu_present_map); - } /* * Do we need any more CPUs? If so, then let them know where diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index a8fbd76d8be5..b8484e15dacb 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -143,6 +143,18 @@ static void __init poke_milo(void) mb(); } +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + for (i = 0; i < ncores; i++) + cpu_set(i, cpu_possible_map); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = get_core_count(); @@ -179,14 +191,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_timer_setup(cpu); /* - * Initialise the possible/present maps. - * cpu_possible_map describes the set of CPUs which may be present - * cpu_present_map describes the set of CPUs populated + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. */ - for (i = 0; i < max_cpus; i++) { - cpu_set(i, cpu_possible_map); + for (i = 0; i < max_cpus; i++) cpu_set(i, cpu_present_map); - } /* * Do we need any more CPUs? If so, then let them know where diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h index 5a72e50ca9fc..fe45f7f61223 100644 --- a/include/asm-arm/smp.h +++ b/include/asm-arm/smp.h @@ -41,6 +41,11 @@ extern void show_ipi_list(struct seq_file *p); */ asmlinkage void do_IPI(struct pt_regs *regs); +/* + * Setup the SMP cpu_possible_map + */ +extern void smp_init_cpus(void); + /* * Move global data into per-processor storage. */ -- cgit v1.3-6-gb490 From d9db950cfa3d674ee834d980c329efdf8e4a0568 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 16 Feb 2006 22:36:15 +0000 Subject: [ARM] 3339/1: ARM EABI: make unmuxed syscalls visible Patch from Nicolas Pitre With EABI the multiplex sys_ipc and sys_socketcall syscalls are unavailable and their support code even removed from the compiled kernel, and the new unmuxed syscalls must be used instead. Make those syscall numbers visible. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- include/asm-arm/unistd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 77430d6178ae..8f331bbd39a8 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -309,7 +309,7 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) -#if 0 /* reserve these for un-muxing socketcall */ +#if defined(__ARM_EABI__) /* reserve these for un-muxing socketcall */ #define __NR_socket (__NR_SYSCALL_BASE+281) #define __NR_bind (__NR_SYSCALL_BASE+282) #define __NR_connect (__NR_SYSCALL_BASE+283) @@ -329,7 +329,7 @@ #define __NR_recvmsg (__NR_SYSCALL_BASE+297) #endif -#if 0 /* reserve these for un-muxing ipc */ +#if defined(__ARM_EABI__) /* reserve these for un-muxing ipc */ #define __NR_semop (__NR_SYSCALL_BASE+298) #define __NR_semget (__NR_SYSCALL_BASE+299) #define __NR_semctl (__NR_SYSCALL_BASE+300) @@ -347,7 +347,7 @@ #define __NR_request_key (__NR_SYSCALL_BASE+310) #define __NR_keyctl (__NR_SYSCALL_BASE+311) -#if 0 /* reserved for un-muxing ipc */ +#if defined(__ARM_EABI__) /* reserved for un-muxing ipc */ #define __NR_semtimedop (__NR_SYSCALL_BASE+312) #endif -- cgit v1.3-6-gb490 From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:41:58 +0100 Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and cleanup AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution installation it's easiest if it's a boot time option. Also I moved the variable to a more appropiate place and make it independent from sysctl And marked __read_mostly which it is. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ arch/i386/kernel/cpu/transmeta.c | 1 + include/linux/kernel.h | 6 ------ include/linux/mm.h | 2 ++ kernel/sysctl.c | 2 -- mm/memory.c | 10 ++++++++++ 6 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ac75b57edf2e..b874771385cd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1638,6 +1638,9 @@ running once the system is up. Format: ,,,,,[,[,[,]]] + norandmaps Don't use address space randomization + Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + ______________________________________________________________________ Changelog: diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index bdbeb77f4e22..7214c9b577ab 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b49affa0ac5a..3b507bf05d09 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -326,12 +326,6 @@ struct sysinfo { /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#ifdef CONFIG_SYSCTL -extern int randomize_va_space; -#else -#define randomize_va_space 1 -#endif - /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/mm.h b/include/linux/mm.h index 75e9f0724997..26e1663a5cbe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask, void drop_pagecache(void); void drop_slab(void); +extern int randomize_va_space; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 71dd6f62efec..7654d55c47f5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -126,8 +126,6 @@ extern int sysctl_hz_timer; extern int acct_parm[]; #endif -int randomize_va_space = 1; - static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, diff --git a/mm/memory.c b/mm/memory.c index 2bee1f21aa8a..9abc6008544b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(vmalloc_earlyreserve); +int randomize_va_space __read_mostly = 1; + +static int __init disable_randmaps(char *s) +{ + randomize_va_space = 0; + return 0; +} +__setup("norandmaps", disable_randmaps); + + /* * If a p?d_bad entry is found while walking page tables, report * the error, before resetting entry to p?d_none. Usually (but -- cgit v1.3-6-gb490 From 7fd67843b96f90f59c9a244a1bc25137978a3ff9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:07 +0100 Subject: [PATCH] x86_64: Disable tsc when apicpmtimer is active Otherwise it has no effect anyways. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 1 + arch/x86_64/kernel/time.c | 3 +-- include/asm-x86_64/proto.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7a0a3e8d5d72..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -1152,6 +1152,7 @@ __setup("noapicmaintimer", setup_noapicmaintimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; + notsc_setup(NULL); return setup_apicmaintimer(NULL); } __setup("apicpmtimer", setup_apicpmtimer); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3c58c30506a1..67841d11ed1f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1327,8 +1327,7 @@ static int __init nohpet_setup(char *s) __setup("nohpet", nohpet_setup); - -static int __init notsc_setup(char *s) +int __init notsc_setup(char *s) { notsc = 1; return 0; diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index c99832e7bf3f..eca3f2d633db 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -133,6 +133,7 @@ extern int fix_aperture; extern int force_iommu; extern int reboot_force; +extern int notsc_setup(char *); extern void smp_local_timer_interrupt(struct pt_regs * regs); -- cgit v1.3-6-gb490 From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 17 Feb 2006 10:30:23 +1100 Subject: [PATCH] Provide an interface for getting the current tick length This provides an interface for arch code to find out how many nanoseconds are going to be added on to xtime by the next call to do_timer. The value returned is a fixed-point number in 52.12 format in nanoseconds. The reason for this format is that it gives the full precision that the timekeeping code is using internally. The motivation for this is to fix a problem that has arisen on 32-bit powerpc in that the value returned by do_gettimeofday drifts apart from xtime if NTP is being used. PowerPC is now using a lockless do_gettimeofday based on reading the timebase register and performing some simple arithmetic. (This method of getting the time is also exported to userspace via the VDSO.) However, the factor and offset it uses were calculated based on the nominal tick length and weren't being adjusted when NTP varied the tick length. Note that 64-bit powerpc has had the lockless do_gettimeofday for a long time now. It also had an extremely hairy routine that got called from the 32-bit compat routine for adjtimex, which adjusted the factor and offset according to what it thought the timekeeping code was going to do. Not only was this only called if a 32-bit task did adjtimex (i.e. not if a 64-bit task did adjtimex), it was also duplicating computations from kernel/timer.c and it wasn't clear that it was (still) correct. The simple solution is to ask the timekeeping code how long the current jiffy will be on each timer interrupt, after calling do_timer. If this jiffy will be a different length from the last one, we then need to compute new values for the factor and offset used in the lockless do_gettimeofday. In this way we can keep xtime and do_gettimeofday in sync, even when NTP is varying the tick length. Note that when adjtimex varies the tick length, it almost always introduces the variation from the next tick on. The only case I could see where adjtimex would vary the length of the current tick is when an old-style adjtime adjustment is being cancelled. (It's not clear to me why the adjustment has to be cancelled immediately rather than from the next tick on.) Thus I don't see any real need for a hook in adjtimex; the rare case of an old-style adjustment being cancelled can be fixed up at the next tick. Signed-off-by: Paul Mackerras Acked-by: john stultz Signed-off-by: Linus Torvalds --- include/linux/timex.h | 3 +++ kernel/timer.c | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 04a4a8cb4ed3..b7ca1204e42a 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -345,6 +345,9 @@ time_interpolator_reset(void) #endif /* !CONFIG_TIME_INTERPOLATION */ +/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ +extern u64 current_tick_length(void); + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ diff --git a/kernel/timer.c b/kernel/timer.c index b9dad3994676..fe3a9a9f8328 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -717,12 +717,16 @@ static void second_overflow(void) #endif } -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) +/* + * Returns how many microseconds we need to add to xtime this tick + * in doing an adjustment requested with adjtime. + */ +static long adjtime_adjustment(void) { - long time_adjust_step, delta_nsec; + long time_adjust_step; - if ((time_adjust_step = time_adjust) != 0 ) { + time_adjust_step = time_adjust; + if (time_adjust_step) { /* * We are doing an adjtime thing. Prepare time_adjust_step to * be within bounds. Note that a positive time_adjust means we @@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void) */ time_adjust_step = min(time_adjust_step, (long)tickadj); time_adjust_step = max(time_adjust_step, (long)-tickadj); + } + return time_adjust_step; +} +/* in the NTP reference this is called "hardclock()" */ +static void update_wall_time_one_tick(void) +{ + long time_adjust_step, delta_nsec; + + time_adjust_step = adjtime_adjustment(); + if (time_adjust_step) /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; - } delta_nsec = tick_nsec + time_adjust_step * 1000; /* * Advance the phase, once it gets to one microsecond, then @@ -758,6 +771,22 @@ static void update_wall_time_one_tick(void) } } +/* + * Return how long ticks are at the moment, that is, how much time + * update_wall_time_one_tick will add to xtime next time we call it + * (assuming no calls to do_adjtimex in the meantime). + * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 + * bits to the right of the binary point. + * This function has no side-effects. + */ +u64 current_tick_length(void) +{ + long delta_nsec; + + delta_nsec = tick_nsec + adjtime_adjustment() * 1000; + return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; +} + /* * Using a loop looks inefficient, but "ticks" is * usually just one (we shouldn't be losing ticks, -- cgit v1.3-6-gb490 From cfe91f9ce297e23e6fbdf61c02bdd8ab9af7c8a8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Fri, 17 Feb 2006 03:16:55 -0500 Subject: [PATCH] i386: fix singlestepping though a syscall Do not mask TIF_SINGLESTEP bit in _TIF_WORK_MASK. Masking this stopped do_notify_resume() from being called when it should have been. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Linus Torvalds --- include/asm-i386/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index e20e99551d71..1f7d48c9ba3f 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -158,8 +158,8 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|\ - _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_EMU)) /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) -- cgit v1.3-6-gb490