From 67c457a8c378a006a34d92f9bd3078a80a92f250 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Apr 2009 07:50:56 -0400 Subject: jbd2: use SWRITE_SYNC_PLUG when writing synchronous revoke records The revoke records must be written using the same way as the rest of the blocks during the commit process; that is, either marked as synchronous writes or as asynchornous writes. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8815a3456b3b..cc02393bfce8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1193,7 +1193,8 @@ extern int jbd2_journal_init_revoke_caches(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); -extern void jbd2_journal_write_revoke_records(journal_t *, transaction_t *); +extern void jbd2_journal_write_revoke_records(journal_t *, + transaction_t *, int); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); -- cgit v1.3-8-gc7d7 From 38d726d153cfe5efe5fe22d28d36ab382dda3a5c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Apr 2009 10:10:47 -0400 Subject: jbd: use SWRITE_SYNC_PLUG when writing synchronous revoke records The revoke records must be written using the same way as the rest of the blocks during the commit process; that is, either marked as synchronous writes or as asynchornous writes. Signed-off-by: "Theodore Ts'o" --- fs/jbd/commit.c | 2 +- fs/jbd/revoke.c | 20 +++++++++++--------- include/linux/jbd.h | 3 ++- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a8e8513a78a9..06560c520f49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal) err = 0; } - journal_write_revoke_records(journal, commit_transaction); + journal_write_revoke_records(journal, commit_transaction, write_op); /* * If we found any dirty or locked buffers, then we should have diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c7bd649bbbdc..1b1a06e1c836 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -67,6 +67,7 @@ #include #include #include +#include #endif #include @@ -99,8 +100,8 @@ struct jbd_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, struct journal_head **, int *, - struct jbd_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); + struct jbd_revoke_record_s *, int); +static void flush_descriptor(journal_t *, struct journal_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -486,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal) */ void journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) + transaction_t *transaction, int write_op) { struct journal_head *descriptor; struct jbd_revoke_record_s *record; @@ -510,14 +511,14 @@ void journal_write_revoke_records(journal_t *journal, hash_list->next; write_one_revoke_record(journal, transaction, &descriptor, &offset, - record); + record, write_op); count++; list_del(&record->hash); kmem_cache_free(revoke_record_cache, record); } } if (descriptor) - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); jbd_debug(1, "Wrote %d revoke records\n", count); } @@ -530,7 +531,8 @@ static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, struct journal_head **descriptorp, int *offsetp, - struct jbd_revoke_record_s *record) + struct jbd_revoke_record_s *record, + int write_op) { struct journal_head *descriptor; int offset; @@ -549,7 +551,7 @@ static void write_one_revoke_record(journal_t *journal, /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } } @@ -586,7 +588,7 @@ static void write_one_revoke_record(journal_t *journal, static void flush_descriptor(journal_t *journal, struct journal_head *descriptor, - int offset) + int offset, int write_op) { journal_revoke_header_t *header; struct buffer_head *bh = jh2bh(descriptor); @@ -601,7 +603,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); } #endif diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 53ae4399da2d..c2049a04fa0b 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -978,7 +978,8 @@ extern void journal_destroy_revoke(journal_t *); extern int journal_revoke (handle_t *, unsigned long, struct buffer_head *); extern int journal_cancel_revoke(handle_t *, struct journal_head *); -extern void journal_write_revoke_records(journal_t *, transaction_t *); +extern void journal_write_revoke_records(journal_t *, + transaction_t *, int); /* Recovery revoke support */ extern int journal_set_revoke(journal_t *, unsigned long, tid_t); -- cgit v1.3-8-gc7d7 From 412401029259b1ad67559cec93bcc7ee4a9551aa Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 16 Apr 2009 09:58:44 -0600 Subject: powerpc/5200: Bring the legacy fsl_spi_platform_data hooks back In commit 364fdbc00fbdd409ade63500710123fe323aa164 ("spi_mpc83xx: rework chip selects handling"), I merged activate_cs and deactivate_cs hooks into cs_control, but I overlooked that mpc52xx_psc_spi driver is using these hooks too. And that resulted in the following build failure: CC drivers/spi/mpc52xx_psc_spi.o drivers/spi/mpc52xx_psc_spi.c: In function 'mpc52xx_psc_spi_do_probe': drivers/spi/mpc52xx_psc_spi.c:398: error: 'struct fsl_spi_platform_data' has no member named 'activate_cs' drivers/spi/mpc52xx_psc_spi.c:399: error: 'struct fsl_spi_platform_data' has no member named 'deactivate_cs' make[2]: *** [drivers/spi/mpc52xx_psc_spi.o] Error 1 This patch simply adds the legacy hooks back for 2.6.30, and for 2.6.31 we'll convert the driver to ->cs_control. Reported-by: Subrata Modak Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/fsl_devices.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index f2a78b5e8b55..0cde1806cfab 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -83,6 +83,10 @@ struct fsl_spi_platform_data { u16 max_chipselect; void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; + + /* Legacy hooks, used by mpc52xx_psc_spi driver. */ + void (*activate_cs)(u8 cs, u8 polarity); + void (*deactivate_cs)(u8 cs, u8 polarity); }; struct mpc8xx_pcmcia_ops { -- cgit v1.3-8-gc7d7 From 0112fc2229847feb6c4eb011e6833d8f1742a375 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 8 Apr 2009 20:05:42 +0400 Subject: Separate out common fstatat code into vfs_fstatat This is a version incorporating Christoph's suggestion. Separate out common *fstatat functionality into a single function instead of duplicating it all over the code. Signed-off-by: Oleg Drokin Signed-off-by: Al Viro --- arch/arm/kernel/sys_oabi-compat.c | 19 ++++--------- arch/s390/kernel/compat_linux.c | 18 ++++--------- arch/sparc/kernel/sys_sparc32.c | 19 ++++--------- arch/x86/ia32/sys_ia32.c | 19 ++++--------- fs/compat.c | 19 ++++--------- fs/stat.c | 56 +++++++++++++++++++-------------------- include/linux/fs.h | 1 + 7 files changed, 54 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index e04173c7e621..d59a0cd537f0 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -177,21 +177,12 @@ asmlinkage long sys_oabi_fstatat64(int dfd, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_oldabi_stat64(&stat, statbuf); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_oldabi_stat64(&stat, statbuf); } struct oabi_flock64 { diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 6cc87d8c8682..002c70d3cb75 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -702,20 +702,12 @@ asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename, struct stat64_emu31 __user* statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); + int error; - if (!error) - error = cp_stat64(statbuf, &stat); -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index e800503879e4..f5000a460c05 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -206,21 +206,12 @@ asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, struct compat_stat64 __user * statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat64(&stat, statbuf); } asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index efac92fd1efb..085a8c35f149 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -129,21 +129,12 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, struct stat64 __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_stat64(statbuf, &stat); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f15889..dda72e267092 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -204,21 +204,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, struct compat_stat __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #endif diff --git a/fs/stat.c b/fs/stat.c index 2db740a0cfb5..54711662b855 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -109,6 +109,24 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) EXPORT_SYMBOL(vfs_fstat); +int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) +{ + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, stat); + else + error = vfs_stat_fd(dfd, filename, stat); +out: + return error; +} + +EXPORT_SYMBOL(vfs_fstatat); + + #ifdef __ARCH_WANT_OLD_STAT /* @@ -264,21 +282,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat(&stat, statbuf); } #endif @@ -404,21 +413,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index e766be0d4329..257f4d37ad23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2302,6 +2302,7 @@ extern int vfs_lstat(char __user *, struct kstat *); extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); +extern int vfs_fstatat(int , char __user *, struct kstat *, int); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); -- cgit v1.3-8-gc7d7 From 2eae7a1874ca5be3232765d89e0250a449f1bc90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2009 16:34:03 -0400 Subject: kill vfs_stat_fd / vfs_lstat_fd There's really no reason to keep vfs_stat_fd and vfs_lstat_fd with Oleg's vfs_fstatat. Use vfs_fstatat for the few cases having the directory fd, and switch all others to vfs_stat / vfs_lstat. Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/compat.c | 18 +++++---- fs/stat.c | 105 +++++++++++++++++++++-------------------------------- include/linux/fs.h | 2 - 3 files changed, 52 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index dda72e267092..379a399bf5c3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_stat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } asmlinkage long compat_sys_newlstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_lstat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 diff --git a/fs/stat.c b/fs/stat.c index 54711662b855..075694e31d8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -55,46 +55,6 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) EXPORT_SYMBOL(vfs_getattr); -int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; - - error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } - return error; -} - -int vfs_stat(char __user *name, struct kstat *stat) -{ - return vfs_stat_fd(AT_FDCWD, name, stat); -} - -EXPORT_SYMBOL(vfs_stat); - -int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; - - error = user_path_at(dfd, name, 0, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } - return error; -} - -int vfs_lstat(char __user *name, struct kstat *stat) -{ - return vfs_lstat_fd(AT_FDCWD, name, stat); -} - -EXPORT_SYMBOL(vfs_lstat); - int vfs_fstat(unsigned int fd, struct kstat *stat) { struct file *f = fget(fd); @@ -106,26 +66,43 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) } return error; } - EXPORT_SYMBOL(vfs_fstat); int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) { + struct path path; int error = -EINVAL; + int lookup_flags = 0; if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) goto out; - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, stat); - else - error = vfs_stat_fd(dfd, filename, stat); + if (!(flag & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + goto out; + + error = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); out: return error; } - EXPORT_SYMBOL(vfs_fstatat); +int vfs_stat(char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, 0); +} +EXPORT_SYMBOL(vfs_stat); + +int vfs_lstat(char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); +} +EXPORT_SYMBOL(vfs_lstat); + #ifdef __ARCH_WANT_OLD_STAT @@ -173,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_stat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) @@ -258,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error = vfs_stat(filename, &stat); - return error; + if (error) + return error; + return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_new_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) diff --git a/include/linux/fs.h b/include/linux/fs.h index 257f4d37ad23..8f42b35a7565 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2299,8 +2299,6 @@ extern int vfs_readdir(struct file *, filldir_t, void *); extern int vfs_stat(char __user *, struct kstat *); extern int vfs_lstat(char __user *, struct kstat *); -extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); -extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , char __user *, struct kstat *, int); -- cgit v1.3-8-gc7d7 From 38e23c95f92a84fb8505a9f572b8a209c9c372c1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 9 Apr 2009 20:17:52 +0900 Subject: fs: Mark get_filesystem_list() as __init function. "int get_filesystem_list(char * buf)" is called by only "static void __init get_fs_names(char *page)". We can mark get_filesystem_list() as "__init". Signed-off-by: Tetsuo Handa Signed-off-by: Al Viro --- fs/filesystems.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d1..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) return retval; } -int get_filesystem_list(char * buf) +int __init get_filesystem_list(char *buf) { int len = 0; struct file_system_type * tmp; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f42b35a7565..5bed436f4353 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2448,7 +2448,7 @@ struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -int get_filesystem_list(char * buf); +int __init get_filesystem_list(char *buf); #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ -- cgit v1.3-8-gc7d7 From be9208dff23af904655807672dd8235abf6ac039 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Apr 2009 23:29:41 -0400 Subject: reiserfs: fix j_last_flush_trans_id type Conversion in commit 600ed41675d8c384519d8f0b3c76afed39ef2f4b had missed that one, but converted format from %lu to %u. As the result, /proc/..../journal got buggered on 64bit boxen. Signed-off-by: Al Viro --- include/linux/reiserfs_fs_sb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 5621d87c4479..6b361d23a499 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -193,7 +193,7 @@ struct reiserfs_journal { atomic_t j_wcount; /* count of writers for current commit */ unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ - unsigned long j_last_flush_trans_id; /* last fully flushed journal timestamp */ + unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ struct buffer_head *j_header_bh; time_t j_trans_start_time; /* time this transaction started */ -- cgit v1.3-8-gc7d7 From 88bea188b85f9cefefbbd56b8a48d0f798409177 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 21 Apr 2009 00:35:47 -0400 Subject: ACPI: add /sys/firmware/acpi/interrupts/sci_not counter This counter may prove useful in debugging some spurious interrupt issues seen in the field. Signed-off-by: Len Brown --- Documentation/ABI/testing/sysfs-firmware-acpi | 8 ++++++-- drivers/acpi/osl.c | 4 +++- drivers/acpi/system.c | 11 +++++++++-- include/linux/acpi.h | 1 + 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index e8ffc70ffe12..4f9ba3c2fca7 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -69,9 +69,13 @@ Description: gpe1F: 0 invalid gpe_all: 1192 sci: 1194 + sci_not: 0 - sci - The total number of times the ACPI SCI - has claimed an interrupt. + sci - The number of times the ACPI SCI + has been called and claimed an interrupt. + + sci_not - The number of times the ACPI SCI + has been called and NOT claimed an interrupt. gpe_all - count of SCI caused by GPEs. diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index d59f08ecaf16..d916bea729f1 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -353,8 +353,10 @@ static irqreturn_t acpi_irq(int irq, void *dev_id) if (handled) { acpi_irq_handled++; return IRQ_HANDLED; - } else + } else { + acpi_irq_not_handled++; return IRQ_NONE; + } } acpi_status diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index da51f05ef8d8..0944daec064f 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -38,6 +38,7 @@ ACPI_MODULE_NAME("system"); #define ACPI_SYSTEM_DEVICE_NAME "System" u32 acpi_irq_handled; +u32 acpi_irq_not_handled; /* * Make ACPICA version work as module param @@ -214,8 +215,9 @@ err: #define COUNT_GPE 0 #define COUNT_SCI 1 /* acpi_irq_handled */ -#define COUNT_ERROR 2 /* other */ -#define NUM_COUNTERS_EXTRA 3 +#define COUNT_SCI_NOT 2 /* acpi_irq_not_handled */ +#define COUNT_ERROR 3 /* other */ +#define NUM_COUNTERS_EXTRA 4 struct event_counter { u32 count; @@ -317,6 +319,8 @@ static ssize_t counter_show(struct kobject *kobj, all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI].count = acpi_irq_handled; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI_NOT].count = + acpi_irq_not_handled; all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE].count = acpi_gpe_count; @@ -363,6 +367,7 @@ static ssize_t counter_set(struct kobject *kobj, all_counters[i].count = 0; acpi_gpe_count = 0; acpi_irq_handled = 0; + acpi_irq_not_handled = 0; goto end; } @@ -456,6 +461,8 @@ void acpi_irq_stats_init(void) sprintf(buffer, "gpe_all"); else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) sprintf(buffer, "sci"); + else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI_NOT) + sprintf(buffer, "sci_not"); else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR) sprintf(buffer, "error"); else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6586cbd0d4af..88be890ee3c7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -111,6 +111,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; +extern u32 acpi_irq_not_handled; extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; -- cgit v1.3-8-gc7d7 From 8b9cf76d0fa6cd98fe42dd2f86460d6ede55fed8 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 21 Apr 2009 13:44:13 +0200 Subject: Fix SYSCALL_ALIAS for older MIPS assembler Older MIPS assembler don't support .set for defining aliases. Using = works for old and new assembers. Signed-off-by: Thomas Bogendoerfer Acked-by: Ralf Baechle Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dabe4ad89141..40617c1d8976 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -148,7 +148,7 @@ struct old_linux_dirent; asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) #else -#ifdef CONFIG_ALPHA +#if defined(CONFIG_ALPHA) || defined(CONFIG_MIPS) #define SYSCALL_ALIAS(alias, name) \ asm ( #alias " = " #name "\n\t.globl " #alias) #else -- cgit v1.3-8-gc7d7 From 8e19608e8b5c001e4a66ce482edc474f05fb7355 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 21 Apr 2009 12:24:00 -0700 Subject: clocksource: pass clocksource to read() callback Pass clocksource pointer to the read() callback for clocksources. This allows us to share the callback between multiple instances. [hugh@veritas.com: fix powerpc build of clocksource pass clocksource mods] [akpm@linux-foundation.org: cleanup] Signed-off-by: Magnus Damm Acked-by: John Stultz Cc: Thomas Gleixner Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-at91/at91rm9200_time.c | 2 +- arch/arm/mach-at91/at91sam926x_time.c | 2 +- arch/arm/mach-davinci/time.c | 2 +- arch/arm/mach-imx/time.c | 2 +- arch/arm/mach-ixp4xx/common.c | 2 +- arch/arm/mach-msm/timer.c | 4 ++-- arch/arm/mach-netx/time.c | 2 +- arch/arm/mach-ns9xxx/time-ns9360.c | 2 +- arch/arm/mach-omap1/time.c | 2 +- arch/arm/mach-omap2/timer-gp.c | 2 +- arch/arm/mach-pxa/time.c | 2 +- arch/arm/mach-realview/core.c | 2 +- arch/arm/mach-versatile/core.c | 2 +- arch/arm/plat-mxc/time.c | 2 +- arch/arm/plat-omap/common.c | 4 ++-- arch/arm/plat-orion/time.c | 2 +- arch/avr32/kernel/time.c | 2 +- arch/blackfin/kernel/time-ts.c | 12 ++++++------ arch/ia64/kernel/cyclone.c | 2 +- arch/ia64/kernel/time.c | 4 ++-- arch/ia64/sn/kernel/sn2/timer.c | 2 +- arch/m68knommu/platform/68328/timers.c | 2 +- arch/m68knommu/platform/coldfire/dma_timer.c | 2 +- arch/m68knommu/platform/coldfire/pit.c | 2 +- arch/m68knommu/platform/coldfire/timers.c | 2 +- arch/mips/kernel/cevt-txx9.c | 2 +- arch/mips/kernel/csrc-bcm1480.c | 2 +- arch/mips/kernel/csrc-ioasic.c | 6 +++--- arch/mips/kernel/csrc-r4k.c | 2 +- arch/mips/kernel/csrc-sb1250.c | 2 +- arch/mips/kernel/i8253.c | 2 +- arch/mips/nxp/pnx8550/common/time.c | 2 +- arch/mips/sgi-ip27/ip27-timer.c | 2 +- arch/powerpc/kernel/time.c | 8 ++++---- arch/s390/kernel/time.c | 2 +- arch/sh/kernel/time_32.c | 2 +- arch/sh/kernel/timers/timer-tmu.c | 2 +- arch/sparc/kernel/time_64.c | 7 ++++++- arch/um/kernel/time.c | 2 +- arch/x86/kernel/hpet.c | 6 +++--- arch/x86/kernel/i8253.c | 2 +- arch/x86/kernel/kvmclock.c | 7 ++++++- arch/x86/kernel/tsc.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 2 +- arch/x86/xen/time.c | 7 ++++++- drivers/char/hpet.c | 2 +- drivers/clocksource/acpi_pm.c | 12 ++++++------ drivers/clocksource/cyclone.c | 2 +- drivers/clocksource/scx200_hrt.c | 2 +- drivers/clocksource/tcb_clksrc.c | 2 +- include/linux/clocksource.h | 6 +++--- kernel/time/clocksource.c | 8 ++++---- kernel/time/jiffies.c | 2 +- 54 files changed, 94 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 1ff1bda0a894..309f3511aa20 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -85,7 +85,7 @@ static struct irqaction at91rm9200_timer_irq = { .handler = at91rm9200_timer_interrupt }; -static cycle_t read_clk32k(void) +static cycle_t read_clk32k(struct clocksource *cs) { return read_CRTR(); } diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index b63e1d5f1bad..4bd56aee4370 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -31,7 +31,7 @@ static u32 pit_cnt; /* access only w/system irq blocked */ * Clocksource: just a monotonic counter of MCK/16 cycles. * We don't care whether or not PIT irqs are enabled. */ -static cycle_t read_pit_clk(void) +static cycle_t read_pit_clk(struct clocksource *cs) { unsigned long flags; u32 elapsed; diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index f8bcd29d17a6..6c227d4ba998 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -238,7 +238,7 @@ static void __init timer_init(void) /* * clocksource */ -static cycle_t read_cycles(void) +static cycle_t read_cycles(struct clocksource *cs) { struct timer_s *t = &timers[TID_CLOCKSOURCE]; diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index aff0ebcfa847..5aef18b599e5 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -73,7 +73,7 @@ static void __init imx_timer_hardware_init(void) IMX_TCTL(TIMER_BASE) = TCTL_FRR | TCTL_CLK_PCLK1 | TCTL_TEN; } -cycle_t imx_get_cycles(void) +cycle_t imx_get_cycles(struct clocksource *cs) { return IMX_TCN(TIMER_BASE); } diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index f4656d2ac8a8..1e93dfee7543 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -401,7 +401,7 @@ void __init ixp4xx_sys_init(void) /* * clocksource */ -cycle_t ixp4xx_get_cycles(void) +cycle_t ixp4xx_get_cycles(struct clocksource *cs) { return *IXP4XX_OSTS; } diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 444d9c0f5ca6..4855b8ca5101 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -57,12 +57,12 @@ static irqreturn_t msm_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static cycle_t msm_gpt_read(void) +static cycle_t msm_gpt_read(struct clocksource *cs) { return readl(MSM_GPT_BASE + TIMER_COUNT_VAL); } -static cycle_t msm_dgt_read(void) +static cycle_t msm_dgt_read(struct clocksource *cs) { return readl(MSM_DGT_BASE + TIMER_COUNT_VAL) >> MSM_DGT_SHIFT; } diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index f201fddb594f..82801dbf0579 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c @@ -104,7 +104,7 @@ static struct irqaction netx_timer_irq = { .handler = netx_timer_interrupt, }; -cycle_t netx_get_cycles(void) +cycle_t netx_get_cycles(struct clocksource *cs) { return readl(NETX_GPIO_COUNTER_CURRENT(TIMER_CLOCKSOURCE)); } diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index 41df69721769..77281260358a 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -25,7 +25,7 @@ #define TIMER_CLOCKEVENT 1 static u32 latch; -static cycle_t ns9360_clocksource_read(void) +static cycle_t ns9360_clocksource_read(struct clocksource *cs) { return __raw_readl(SYS_TR(TIMER_CLOCKSOURCE)); } diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 495a32c287b4..4d56408d3cff 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -198,7 +198,7 @@ static struct irqaction omap_mpu_timer2_irq = { .handler = omap_mpu_timer2_interrupt, }; -static cycle_t mpu_read(void) +static cycle_t mpu_read(struct clocksource *cs) { return ~omap_mpu_timer_read(1); } diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index 9fc13a2cc3f4..1cb2c0909c2b 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -138,7 +138,7 @@ static inline void __init omap2_gp_clocksource_init(void) {} * clocksource */ static struct omap_dm_timer *gpt_clocksource; -static cycle_t clocksource_read_cycles(void) +static cycle_t clocksource_read_cycles(struct clocksource *cs) { return (cycle_t)omap_dm_timer_read_counter(gpt_clocksource); } diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 8eb3830fbb0b..750c448db672 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -125,7 +125,7 @@ static struct clock_event_device ckevt_pxa_osmr0 = { .set_mode = pxa_osmr0_set_mode, }; -static cycle_t pxa_read_oscr(void) +static cycle_t pxa_read_oscr(struct clocksource *cs) { return OSCR; } diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 9ab947c14f26..942e1a7eb9b2 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -715,7 +715,7 @@ static struct irqaction realview_timer_irq = { .handler = realview_timer_interrupt, }; -static cycle_t realview_get_cycles(void) +static cycle_t realview_get_cycles(struct clocksource *cs) { return ~readl(timer3_va_base + TIMER_VALUE); } diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 565776680d8c..1f929c391af7 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -948,7 +948,7 @@ static struct irqaction versatile_timer_irq = { .handler = versatile_timer_interrupt, }; -static cycle_t versatile_get_cycles(void) +static cycle_t versatile_get_cycles(struct clocksource *cs) { return ~readl(TIMER3_VA_BASE + TIMER_VALUE); } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index ef1b3cd85bd3..dab3357196fb 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -36,7 +36,7 @@ static enum clock_event_mode clockevent_mode = CLOCK_EVT_MODE_UNUSED; /* clock source */ -static cycle_t mxc_get_cycles(void) +static cycle_t mxc_get_cycles(struct clocksource *cs) { return __raw_readl(TIMER_BASE + MXC_TCN); } diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index d1797147732f..433021f3d7cc 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -185,7 +185,7 @@ console_initcall(omap_add_serial_console); #include -static cycle_t omap_32k_read(void) +static cycle_t omap_32k_read(struct clocksource *cs) { return omap_readl(TIMER_32K_SYNCHRONIZED); } @@ -207,7 +207,7 @@ unsigned long long sched_clock(void) { unsigned long long ret; - ret = (unsigned long long)omap_32k_read(); + ret = (unsigned long long)omap_32k_read(&clocksource_32k); ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift; return ret; } diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 6fa2923e6dca..2faf9dba4ef7 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -41,7 +41,7 @@ static u32 ticks_per_jiffy; /* * Clocksource handling. */ -static cycle_t orion_clksrc_read(void) +static cycle_t orion_clksrc_read(struct clocksource *cs) { return 0xffffffff - readl(TIMER0_VAL); } diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 0ff46bf873b0..f27aa3b259fa 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -18,7 +18,7 @@ #include -static cycle_t read_cycle_count(void) +static cycle_t read_cycle_count(struct clocksource *cs) { return (cycle_t)sysreg_read(COUNT); } diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index 0ed2badfd746..27646121280a 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -58,16 +58,11 @@ static inline unsigned long long cycles_2_ns(cycle_t cyc) return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; } -static cycle_t read_cycles(void) +static cycle_t read_cycles(struct clocksource *cs) { return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod); } -unsigned long long sched_clock(void) -{ - return cycles_2_ns(read_cycles()); -} - static struct clocksource clocksource_bfin = { .name = "bfin_cycles", .rating = 350, @@ -77,6 +72,11 @@ static struct clocksource clocksource_bfin = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +unsigned long long sched_clock(void) +{ + return cycles_2_ns(read_cycles(&clocksource_bfin)); +} + static int __init bfin_clocksource_init(void) { set_cyc2ns_scale(get_cclk() / 1000); diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 790ef0d87e12..71e35864d2e2 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -21,7 +21,7 @@ void __init cyclone_setup(void) static void __iomem *cyclone_mc; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readq((void __iomem *)cyclone_mc); } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 641c8b61c4f1..604c1a35db33 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -33,7 +33,7 @@ #include "fsyscall_gtod_data.h" -static cycle_t itc_get_cycles(void); +static cycle_t itc_get_cycles(struct clocksource *cs); struct fsyscall_gtod_data_t fsyscall_gtod_data = { .lock = SEQLOCK_UNLOCKED, @@ -383,7 +383,7 @@ ia64_init_itm (void) } } -static cycle_t itc_get_cycles(void) +static cycle_t itc_get_cycles(struct clocksource *cs) { u64 lcycle, now, ret; diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index cf67fc562054..21d6f09e3447 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -23,7 +23,7 @@ extern unsigned long sn_rtc_cycles_per_second; -static cycle_t read_sn2(void) +static cycle_t read_sn2(struct clocksource *cs) { return (cycle_t)readq(RTC_COUNTER_ADDR); } diff --git a/arch/m68knommu/platform/68328/timers.c b/arch/m68knommu/platform/68328/timers.c index 6bafefa546e5..309f725995bf 100644 --- a/arch/m68knommu/platform/68328/timers.c +++ b/arch/m68knommu/platform/68328/timers.c @@ -75,7 +75,7 @@ static struct irqaction m68328_timer_irq = { /***************************************************************************/ -static cycle_t m68328_read_clk(void) +static cycle_t m68328_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/m68knommu/platform/coldfire/dma_timer.c b/arch/m68knommu/platform/coldfire/dma_timer.c index 772578b1084f..a5f562823d7a 100644 --- a/arch/m68knommu/platform/coldfire/dma_timer.c +++ b/arch/m68knommu/platform/coldfire/dma_timer.c @@ -34,7 +34,7 @@ #define DMA_DTMR_CLK_DIV_16 (2 << 1) #define DMA_DTMR_ENABLE (1 << 0) -static cycle_t cf_dt_get_cycles(void) +static cycle_t cf_dt_get_cycles(struct clocksource *cs) { return __raw_readl(DTCN0); } diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c index 2a12e7fa9748..61b96211f8ff 100644 --- a/arch/m68knommu/platform/coldfire/pit.c +++ b/arch/m68knommu/platform/coldfire/pit.c @@ -125,7 +125,7 @@ static struct irqaction pit_irq = { /***************************************************************************/ -static cycle_t pit_read_clk(void) +static cycle_t pit_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/m68knommu/platform/coldfire/timers.c b/arch/m68knommu/platform/coldfire/timers.c index 454f25493491..1ba8a3731653 100644 --- a/arch/m68knommu/platform/coldfire/timers.c +++ b/arch/m68knommu/platform/coldfire/timers.c @@ -78,7 +78,7 @@ static struct irqaction mcftmr_timer_irq = { /***************************************************************************/ -static cycle_t mcftmr_read_clk(void) +static cycle_t mcftmr_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index eccf7d6096bd..2e911e3da8d3 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -22,7 +22,7 @@ static struct txx9_tmr_reg __iomem *txx9_cs_tmrptr; -static cycle_t txx9_cs_read(void) +static cycle_t txx9_cs_read(struct clocksource *cs) { return __raw_readl(&txx9_cs_tmrptr->trr); } diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c index 868745e7184b..51489f8a825e 100644 --- a/arch/mips/kernel/csrc-bcm1480.c +++ b/arch/mips/kernel/csrc-bcm1480.c @@ -28,7 +28,7 @@ #include -static cycle_t bcm1480_hpt_read(void) +static cycle_t bcm1480_hpt_read(struct clocksource *cs) { return (cycle_t) __raw_readq(IOADDR(A_SCD_ZBBUS_CYCLE_COUNT)); } diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c index 1d5f63cf8997..b551f48d3a07 100644 --- a/arch/mips/kernel/csrc-ioasic.c +++ b/arch/mips/kernel/csrc-ioasic.c @@ -25,7 +25,7 @@ #include #include -static cycle_t dec_ioasic_hpt_read(void) +static cycle_t dec_ioasic_hpt_read(struct clocksource *cs) { return ioasic_read(IO_REG_FCTR); } @@ -47,13 +47,13 @@ void __init dec_ioasic_clocksource_init(void) while (!ds1287_timer_state()) ; - start = dec_ioasic_hpt_read(); + start = dec_ioasic_hpt_read(&clocksource_dec); while (i--) while (!ds1287_timer_state()) ; - end = dec_ioasic_hpt_read(); + end = dec_ioasic_hpt_read(&clocksource_dec); freq = (end - start) * 10; printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq); diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index f1a2893931ed..e95a3cd48eea 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -10,7 +10,7 @@ #include -static cycle_t c0_hpt_read(void) +static cycle_t c0_hpt_read(struct clocksource *cs) { return read_c0_count(); } diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c index 92212bbb8e45..d14d3d1907fa 100644 --- a/arch/mips/kernel/csrc-sb1250.c +++ b/arch/mips/kernel/csrc-sb1250.c @@ -33,7 +33,7 @@ * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over * again. */ -static cycle_t sb1250_hpt_read(void) +static cycle_t sb1250_hpt_read(struct clocksource *cs) { unsigned int count; diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index 689719e34f08..ed20e7fe65e3 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -128,7 +128,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { unsigned long flags; int count; diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c index cf293b279098..8df43e9e4d90 100644 --- a/arch/mips/nxp/pnx8550/common/time.c +++ b/arch/mips/nxp/pnx8550/common/time.c @@ -35,7 +35,7 @@ static unsigned long cpj; -static cycle_t hpt_read(void) +static cycle_t hpt_read(struct clocksource *cs) { return read_c0_count2(); } diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index f024057a35f8..f10a7cd64f7e 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -159,7 +159,7 @@ static void __init hub_rt_clock_event_global_init(void) setup_irq(irq, &hub_rt_irqaction); } -static cycle_t hub_rt_read(void) +static cycle_t hub_rt_read(struct clocksource *cs) { return REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 926ea864e34f..48571ac56fb7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -77,7 +77,7 @@ #include #include -static cycle_t rtc_read(void); +static cycle_t rtc_read(struct clocksource *); static struct clocksource clocksource_rtc = { .name = "rtc", .rating = 400, @@ -88,7 +88,7 @@ static struct clocksource clocksource_rtc = { .read = rtc_read, }; -static cycle_t timebase_read(void); +static cycle_t timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { .name = "timebase", .rating = 400, @@ -766,12 +766,12 @@ unsigned long read_persistent_clock(void) } /* clocksource code */ -static cycle_t rtc_read(void) +static cycle_t rtc_read(struct clocksource *cs) { return (cycle_t)get_rtc(); } -static cycle_t timebase_read(void) +static cycle_t timebase_read(struct clocksource *cs) { return (cycle_t)get_tb(); } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6ded50dfa75a..ef596d020573 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -201,7 +201,7 @@ unsigned long read_persistent_clock(void) return ts.tv_sec; } -static cycle_t read_tod_clock(void) +static cycle_t read_tod_clock(struct clocksource *cs) { return get_clock(); } diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index c34e1e0f9b02..1700d2465f6c 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -208,7 +208,7 @@ unsigned long long sched_clock(void) if (!clocksource_sh.rating) return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); - cycles = clocksource_sh.read(); + cycles = clocksource_sh.read(&clocksource_sh); return cyc2ns(&clocksource_sh, cycles); } #endif diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c index c5d3396f5960..fe8d8930ccb6 100644 --- a/arch/sh/kernel/timers/timer-tmu.c +++ b/arch/sh/kernel/timers/timer-tmu.c @@ -81,7 +81,7 @@ static int tmu_timer_stop(void) */ static int tmus_are_scaled; -static cycle_t tmu_timer_read(void) +static cycle_t tmu_timer_read(struct clocksource *cs) { return ((cycle_t)(~_tmu_read(TMU1)))<get_tick(); +} + void __init time_init(void) { unsigned long freq = sparc64_init_timers(); @@ -827,7 +832,7 @@ void __init time_init(void) clocksource_tick.mult = clocksource_hz2mult(freq, clocksource_tick.shift); - clocksource_tick.read = tick_ops->get_tick; + clocksource_tick.read = clocksource_tick_read; printk("clocksource: mult[%x] shift[%d]\n", clocksource_tick.mult, clocksource_tick.shift); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index b13a87a3ec95..c8b9c469fcd7 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -65,7 +65,7 @@ static irqreturn_t um_timer(int irq, void *dev) return IRQ_HANDLED; } -static cycle_t itimer_read(void) +static cycle_t itimer_read(struct clocksource *cs) { return os_nsecs() / 1000; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 648b3a2a3a44..3f0019e0a229 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -722,7 +722,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, /* * Clock source related code */ -static cycle_t read_hpet(void) +static cycle_t read_hpet(struct clocksource *cs) { return (cycle_t)hpet_readl(HPET_COUNTER); } @@ -756,7 +756,7 @@ static int hpet_clocksource_register(void) hpet_restart_counter(); /* Verify whether hpet counter works */ - t1 = read_hpet(); + t1 = hpet_readl(HPET_COUNTER); rdtscll(start); /* @@ -770,7 +770,7 @@ static int hpet_clocksource_register(void) rdtscll(now); } while ((now - start) < 200000UL); - if (t1 == read_hpet()) { + if (t1 == hpet_readl(HPET_COUNTER)) { printk(KERN_WARNING "HPET counter not counting. HPET disabled\n"); return -ENODEV; diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 3475440baa54..c2e0bb0890d4 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -129,7 +129,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { static int old_count; static u32 old_jifs; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 137f2e8132df..223af43f1526 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void) return ret; } +static cycle_t kvm_clock_get_cycles(struct clocksource *cs) +{ + return kvm_clock_read(); +} + /* * If we don't do that, there is the possibility that the guest * will calibrate under heavy load - thus, getting a lower lpj - @@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void) static struct clocksource kvm_clock = { .name = "kvm-clock", - .read = kvm_clock_read, + .read = kvm_clock_get_cycles, .rating = 400, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << KVM_SCALE, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7a567ebe6361..d57de05dc430 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc; * code, which is necessary to support wrapping clocksources like pm * timer. */ -static cycle_t read_tsc(void) +static cycle_t read_tsc(struct clocksource *cs) { cycle_t ret = (cycle_t)get_cycles(); diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index d303369a7bad..2b3eb82efeeb 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void) /** vmi clocksource */ static struct clocksource clocksource_vmi; -static cycle_t read_real_cycles(void) +static cycle_t read_real_cycles(struct clocksource *cs) { cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); return max(ret, clocksource_vmi.cycle_last); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a2085368a3dc..ca7ec44bafc3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -663,7 +663,7 @@ static unsigned long lguest_tsc_khz(void) /* If we can't use the TSC, the kernel falls back to our lower-priority * "lguest_clock", where we read the time value given to us by the Host. */ -static cycle_t lguest_clock_read(void) +static cycle_t lguest_clock_read(struct clocksource *cs) { unsigned long sec, nsec; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 14f240623497..0a5aa44299a5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -213,6 +213,11 @@ cycle_t xen_clocksource_read(void) return ret; } +static cycle_t xen_clocksource_get_cycles(struct clocksource *cs) +{ + return xen_clocksource_read(); +} + static void xen_read_wallclock(struct timespec *ts) { struct shared_info *s = HYPERVISOR_shared_info; @@ -241,7 +246,7 @@ int xen_set_wallclock(unsigned long now) static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, - .read = xen_clocksource_read, + .read = xen_clocksource_get_cycles, .mask = ~0, .mult = 1< value1) diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 8615059a8729..64e528e8bfa6 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -19,7 +19,7 @@ int use_cyclone = 0; static void __iomem *cyclone_ptr; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readl(cyclone_ptr); } diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c index b92da677aa5d..27f4d9637b62 100644 --- a/drivers/clocksource/scx200_hrt.c +++ b/drivers/clocksource/scx200_hrt.c @@ -43,7 +43,7 @@ MODULE_PARM_DESC(ppm, "+-adjust to actual XO freq (ppm)"); /* The base timer frequency, * 27 if selected */ #define HRT_FREQ 1000000 -static cycle_t read_hrt(void) +static cycle_t read_hrt(struct clocksource *cs) { /* Read the timer value */ return (cycle_t) inl(scx200_cb_base + SCx200_TIMER_OFFSET); diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 254f1064d973..01b886e68822 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -39,7 +39,7 @@ static void __iomem *tcaddr; -static cycle_t tc_get_cycles(void) +static cycle_t tc_get_cycles(struct clocksource *cs) { unsigned long flags; u32 lower, upper; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 573819ef4cc0..0d96cde9ee5d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -143,7 +143,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * 400-499: Perfect * The ideal clocksource. A must-use where * available. - * @read: returns a cycle value + * @read: returns a cycle value, passes clocksource as argument * @mask: bitmask for two's complement * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier (adjusted by NTP) @@ -162,7 +162,7 @@ struct clocksource { char *name; struct list_head list; int rating; - cycle_t (*read)(void); + cycle_t (*read)(struct clocksource *cs); cycle_t mask; u32 mult; u32 mult_orig; @@ -271,7 +271,7 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) */ static inline cycle_t clocksource_read(struct clocksource *cs) { - return cs->read(); + return cs->read(cs); } /** diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c46c931a7fe7..ecfd7b5187e0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data) resumed = test_and_clear_bit(0, &watchdog_resumed); - wdnow = watchdog->read(); + wdnow = watchdog->read(watchdog); wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { - csnow = cs->read(); + csnow = cs->read(cs); if (unlikely(resumed)) { cs->wd_last = csnow; @@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) list_add(&cs->wd_list, &watchdog_list); if (!started && watchdog) { - watchdog_last = watchdog->read(); + watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); @@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) cse->flags &= ~CLOCK_SOURCE_WATCHDOG; /* Start if list is not empty */ if (!list_empty(&watchdog_list)) { - watchdog_last = watchdog->read(); + watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 06f197560f3b..c3f6c30816e3 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -50,7 +50,7 @@ */ #define JIFFIES_SHIFT 8 -static cycle_t jiffies_read(void) +static cycle_t jiffies_read(struct clocksource *cs) { return (cycle_t) jiffies; } -- cgit v1.3-8-gc7d7 From 4614e6adafa2c5e6c3a9c245af2807fa7bc5117a Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 21 Apr 2009 12:24:02 -0700 Subject: clocksource: add enable() and disable() callbacks Add enable() and disable() callbacks for clocksources. This allows us to put unused clocksources in power save mode. The functions clocksource_enable() and clocksource_disable() wrap the callbacks and are inserted in the timekeeping code to enable before use and disable after switching to a new clocksource. Signed-off-by: Magnus Damm Acked-by: John Stultz Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clocksource.h | 31 +++++++++++++++++++++++++++++++ kernel/time/timekeeping.c | 12 +++++++++--- 2 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 0d96cde9ee5d..5a40d14daa9f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -144,6 +144,8 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * The ideal clocksource. A must-use where * available. * @read: returns a cycle value, passes clocksource as argument + * @enable: optional function to enable the clocksource + * @disable: optional function to disable the clocksource * @mask: bitmask for two's complement * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier (adjusted by NTP) @@ -163,6 +165,8 @@ struct clocksource { struct list_head list; int rating; cycle_t (*read)(struct clocksource *cs); + int (*enable)(struct clocksource *cs); + void (*disable)(struct clocksource *cs); cycle_t mask; u32 mult; u32 mult_orig; @@ -274,6 +278,33 @@ static inline cycle_t clocksource_read(struct clocksource *cs) return cs->read(cs); } +/** + * clocksource_enable: - enable clocksource + * @cs: pointer to clocksource + * + * Enables the specified clocksource. The clocksource callback + * function should start up the hardware and setup mult and field + * members of struct clocksource to reflect hardware capabilities. + */ +static inline int clocksource_enable(struct clocksource *cs) +{ + return cs->enable ? cs->enable(cs) : 0; +} + +/** + * clocksource_disable: - disable clocksource + * @cs: pointer to clocksource + * + * Disables the specified clocksource. The clocksource callback + * function should power down the now unused hardware block to + * save power. + */ +static inline void clocksource_disable(struct clocksource *cs) +{ + if (cs->disable) + cs->disable(cs); +} + /** * cyc2ns - converts clocksource cycles to nanoseconds * @cs: Pointer to clocksource diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 900f1b6598d1..687dff49f6e7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -182,7 +182,7 @@ EXPORT_SYMBOL(do_settimeofday); */ static void change_clocksource(void) { - struct clocksource *new; + struct clocksource *new, *old; new = clocksource_get_next(); @@ -191,11 +191,16 @@ static void change_clocksource(void) clocksource_forward_now(); - new->raw_time = clock->raw_time; + if (clocksource_enable(new)) + return; + new->raw_time = clock->raw_time; + old = clock; clock = new; + clocksource_disable(old); + clock->cycle_last = 0; - clock->cycle_last = clocksource_read(new); + clock->cycle_last = clocksource_read(clock); clock->error = 0; clock->xtime_nsec = 0; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -292,6 +297,7 @@ void __init timekeeping_init(void) ntp_init(); clock = clocksource_get_next(); + clocksource_enable(clock); clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); clock->cycle_last = clocksource_read(clock); -- cgit v1.3-8-gc7d7 From 40112ae7504745799e75ef418057f0d2cb745050 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 21 Apr 2009 12:24:03 -0700 Subject: ipmi: test for event buffer before using The IPMI driver would attempt to use the event buffer even if that didn't exist on the BMC. This patch modified the IPMI driver to check for the event buffer's existence before trying to use it. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_si_intf.c | 148 +++++++++++++++++++++++++++++++-------- include/linux/ipmi_msgdefs.h | 6 ++ 2 files changed, 125 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 2438fdf889b4..259644646b82 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -82,12 +82,6 @@ #define SI_SHORT_TIMEOUT_USEC 250 /* .25ms when the SM request a short timeout */ -/* Bit for BMC global enables. */ -#define IPMI_BMC_RCV_MSG_INTR 0x01 -#define IPMI_BMC_EVT_MSG_INTR 0x02 -#define IPMI_BMC_EVT_MSG_BUFF 0x04 -#define IPMI_BMC_SYS_LOG 0x08 - enum si_intf_state { SI_NORMAL, SI_GETTING_FLAGS, @@ -220,6 +214,9 @@ struct smi_info { OEM2_DATA_AVAIL) unsigned char msg_flags; + /* Does the BMC have an event buffer? */ + char has_event_buffer; + /* * If set to true, this will request events the next time the * state machine is idle. @@ -968,7 +965,8 @@ static void request_events(void *send_info) { struct smi_info *smi_info = send_info; - if (atomic_read(&smi_info->stop_operation)) + if (atomic_read(&smi_info->stop_operation) || + !smi_info->has_event_buffer) return; atomic_set(&smi_info->req_events, 1); @@ -2407,26 +2405,9 @@ static struct of_platform_driver ipmi_of_platform_driver = { }; #endif /* CONFIG_PPC_OF */ - -static int try_get_dev_id(struct smi_info *smi_info) +static int wait_for_msg_done(struct smi_info *smi_info) { - unsigned char msg[2]; - unsigned char *resp; - unsigned long resp_len; enum si_sm_result smi_result; - int rv = 0; - - resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); - if (!resp) - return -ENOMEM; - - /* - * Do a Get Device ID command, since it comes back with some - * useful info. - */ - msg[0] = IPMI_NETFN_APP_REQUEST << 2; - msg[1] = IPMI_GET_DEVICE_ID_CMD; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); smi_result = smi_info->handlers->event(smi_info->si_sm, 0); for (;;) { @@ -2441,16 +2422,39 @@ static int try_get_dev_id(struct smi_info *smi_info) } else break; } - if (smi_result == SI_SM_HOSED) { + if (smi_result == SI_SM_HOSED) /* * We couldn't get the state machine to run, so whatever's at * the port is probably not an IPMI SMI interface. */ - rv = -ENODEV; + return -ENODEV; + + return 0; +} + +static int try_get_dev_id(struct smi_info *smi_info) +{ + unsigned char msg[2]; + unsigned char *resp; + unsigned long resp_len; + int rv = 0; + + resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + /* + * Do a Get Device ID command, since it comes back with some + * useful info. + */ + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_GET_DEVICE_ID_CMD; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + + rv = wait_for_msg_done(smi_info); + if (rv) goto out; - } - /* Otherwise, we got some data. */ resp_len = smi_info->handlers->get_result(smi_info->si_sm, resp, IPMI_MAX_MSG_LENGTH); @@ -2462,6 +2466,88 @@ static int try_get_dev_id(struct smi_info *smi_info) return rv; } +static int try_enable_event_buffer(struct smi_info *smi_info) +{ + unsigned char msg[3]; + unsigned char *resp; + unsigned long resp_len; + int rv = 0; + + resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + + rv = wait_for_msg_done(smi_info); + if (rv) { + printk(KERN_WARNING + "ipmi_si: Error getting response from get global," + " enables command, the event buffer is not" + " enabled.\n"); + goto out; + } + + resp_len = smi_info->handlers->get_result(smi_info->si_sm, + resp, IPMI_MAX_MSG_LENGTH); + + if (resp_len < 4 || + resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || + resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD || + resp[2] != 0) { + printk(KERN_WARNING + "ipmi_si: Invalid return from get global" + " enables command, cannot enable the event" + " buffer.\n"); + rv = -EINVAL; + goto out; + } + + if (resp[3] & IPMI_BMC_EVT_MSG_BUFF) + /* buffer is already enabled, nothing to do. */ + goto out; + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD; + msg[2] = resp[3] | IPMI_BMC_EVT_MSG_BUFF; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + + rv = wait_for_msg_done(smi_info); + if (rv) { + printk(KERN_WARNING + "ipmi_si: Error getting response from set global," + " enables command, the event buffer is not" + " enabled.\n"); + goto out; + } + + resp_len = smi_info->handlers->get_result(smi_info->si_sm, + resp, IPMI_MAX_MSG_LENGTH); + + if (resp_len < 3 || + resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || + resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) { + printk(KERN_WARNING + "ipmi_si: Invalid return from get global," + "enables command, not enable the event" + " buffer.\n"); + rv = -EINVAL; + goto out; + } + + if (resp[2] != 0) + /* + * An error when setting the event buffer bit means + * that the event buffer is not supported. + */ + rv = -ENOENT; + out: + kfree(resp); + return rv; +} + static int type_file_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -2847,6 +2933,10 @@ static int try_smi_init(struct smi_info *new_smi) new_smi->intf_num = smi_num; smi_num++; + rv = try_enable_event_buffer(new_smi); + if (rv == 0) + new_smi->has_event_buffer = 1; + /* * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index b56a158d587a..a079f586e907 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -58,6 +58,12 @@ #define IPMI_READ_EVENT_MSG_BUFFER_CMD 0x35 #define IPMI_GET_CHANNEL_INFO_CMD 0x42 +/* Bit for BMC global enables. */ +#define IPMI_BMC_RCV_MSG_INTR 0x01 +#define IPMI_BMC_EVT_MSG_INTR 0x02 +#define IPMI_BMC_EVT_MSG_BUFF 0x04 +#define IPMI_BMC_SYS_LOG 0x08 + #define IPMI_NETFN_STORAGE_REQUEST 0x0a #define IPMI_NETFN_STORAGE_RESPONSE 0x0b #define IPMI_ADD_SEL_ENTRY_CMD 0x44 -- cgit v1.3-8-gc7d7 From 4dec302ff71ebf48f5784a2d2fc5e3745e6d4d52 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Tue, 21 Apr 2009 12:24:05 -0700 Subject: ipmi: add oem message handling Enable userspace to receive messages that a BMC transmits using an OEM medium. This is used by the HP iLO2. Based on code originally written by Patrick Schoeller. Signed-off-by: dann frazier Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 138 ++++++++++++++++++++++++++++++++++-- include/linux/ipmi.h | 2 + include/linux/ipmi_msgdefs.h | 2 + 3 files changed, 137 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 83c7477ba801..aa83a0865ec1 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3284,6 +3284,114 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, return rv; } +/* + * This routine will handle "Get Message" command responses with + * channels that use an OEM Medium. The message format belongs to + * the OEM. See IPMI 2.0 specification, Chapter 6 and + * Chapter 22, sections 22.6 and 22.24 for more details. + */ +static int handle_oem_get_msg_cmd(ipmi_smi_t intf, + struct ipmi_smi_msg *msg) +{ + struct cmd_rcvr *rcvr; + int rv = 0; + unsigned char netfn; + unsigned char cmd; + unsigned char chan; + ipmi_user_t user = NULL; + struct ipmi_system_interface_addr *smi_addr; + struct ipmi_recv_msg *recv_msg; + + /* + * We expect the OEM SW to perform error checking + * so we just do some basic sanity checks + */ + if (msg->rsp_size < 4) { + /* Message not big enough, just ignore it. */ + ipmi_inc_stat(intf, invalid_commands); + return 0; + } + + if (msg->rsp[2] != 0) { + /* An error getting the response, just ignore it. */ + return 0; + } + + /* + * This is an OEM Message so the OEM needs to know how + * handle the message. We do no interpretation. + */ + netfn = msg->rsp[0] >> 2; + cmd = msg->rsp[1]; + chan = msg->rsp[3] & 0xf; + + rcu_read_lock(); + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); + if (rcvr) { + user = rcvr->user; + kref_get(&user->refcount); + } else + user = NULL; + rcu_read_unlock(); + + if (user == NULL) { + /* We didn't find a user, just give up. */ + ipmi_inc_stat(intf, unhandled_commands); + + /* + * Don't do anything with these messages, just allow + * them to be freed. + */ + + rv = 0; + } else { + /* Deliver the message to the user. */ + ipmi_inc_stat(intf, handled_commands); + + recv_msg = ipmi_alloc_recv_msg(); + if (!recv_msg) { + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ + rv = 1; + kref_put(&user->refcount, free_user); + } else { + /* + * OEM Messages are expected to be delivered via + * the system interface to SMS software. We might + * need to visit this again depending on OEM + * requirements + */ + smi_addr = ((struct ipmi_system_interface_addr *) + &(recv_msg->addr)); + smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr->channel = IPMI_BMC_CHANNEL; + smi_addr->lun = msg->rsp[0] & 3; + + recv_msg->user = user; + recv_msg->user_msg_data = NULL; + recv_msg->recv_type = IPMI_OEM_RECV_TYPE; + recv_msg->msg.netfn = msg->rsp[0] >> 2; + recv_msg->msg.cmd = msg->rsp[1]; + recv_msg->msg.data = recv_msg->msg_data; + + /* + * The message starts at byte 4 which follows the + * the Channel Byte in the "GET MESSAGE" command + */ + recv_msg->msg.data_len = msg->rsp_size - 4; + memcpy(recv_msg->msg_data, + &(msg->rsp[4]), + msg->rsp_size - 4); + deliver_response(recv_msg); + } + } + + return rv; +} + static void copy_event_into_recv_msg(struct ipmi_recv_msg *recv_msg, struct ipmi_smi_msg *msg) { @@ -3539,6 +3647,17 @@ static int handle_new_recv_msg(ipmi_smi_t intf, goto out; } + /* + ** We need to make sure the channels have been initialized. + ** The channel_handler routine will set the "curr_channel" + ** equal to or greater than IPMI_MAX_CHANNELS when all the + ** channels for this interface have been initialized. + */ + if (intf->curr_channel < IPMI_MAX_CHANNELS) { + requeue = 1; /* Just put the message back for now */ + goto out; + } + switch (intf->channels[chan].medium) { case IPMI_CHANNEL_MEDIUM_IPMB: if (msg->rsp[4] & 0x04) { @@ -3574,11 +3693,20 @@ static int handle_new_recv_msg(ipmi_smi_t intf, break; default: - /* - * We don't handle the channel type, so just - * free the message. - */ - requeue = 0; + /* Check for OEM Channels. Clients had better + register for these commands. */ + if ((intf->channels[chan].medium + >= IPMI_CHANNEL_MEDIUM_OEM_MIN) + && (intf->channels[chan].medium + <= IPMI_CHANNEL_MEDIUM_OEM_MAX)) { + requeue = handle_oem_get_msg_cmd(intf, msg); + } else { + /* + * We don't handle the channel type, so just + * free the message. + */ + requeue = 0; + } } } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 7ebdb4fb4e54..65aae34759de 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -198,6 +198,8 @@ struct kernel_ipmi_msg { response. When you send a response message, this will be returned. */ +#define IPMI_OEM_RECV_TYPE 5 /* The response for OEM Channels */ + /* Note that async events and received commands do not have a completion code as the first byte of the incoming data, unlike a response. */ diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index a079f586e907..df97e6e31e87 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -115,5 +115,7 @@ #define IPMI_CHANNEL_MEDIUM_USB1 10 #define IPMI_CHANNEL_MEDIUM_USB2 11 #define IPMI_CHANNEL_MEDIUM_SYSINTF 12 +#define IPMI_CHANNEL_MEDIUM_OEM_MIN 0x60 +#define IPMI_CHANNEL_MEDIUM_OEM_MAX 0x7f #endif /* __LINUX_IPMI_MSGDEFS_H */ -- cgit v1.3-8-gc7d7 From e638c1394010859a015a3b533ee452d768e62cea Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 21 Apr 2009 12:24:41 -0700 Subject: memcg: use rcu_dereference to access mm->owner mm->owner should be accessed with rcu_dereference(). Reported-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 18146c980b68..a9e3b76aa884 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -75,7 +75,7 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) { struct mem_cgroup *mem; rcu_read_lock(); - mem = mem_cgroup_from_task((mm)->owner); + mem = mem_cgroup_from_task(rcu_dereference((mm)->owner)); rcu_read_unlock(); return cgroup == mem; } -- cgit v1.3-8-gc7d7 From 6e538aaf50ae782a890cbc02c27950448d8193e1 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 21 Apr 2009 12:24:49 -0700 Subject: spi: documentation: emphasise spi_master.setup() semantics This is a doc-only patch which I hope will reduce the number of spi_master controller driver patches starting out with a common implementation bug. (As in: almost every spi_master driver I see starts out with its version of this bug. Sigh.) It just re-emphasizes that the setup() method may be called for one device while a transfer is active on another ... which means that most driver implementations shouldn't touch any registers. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/spi/spi-summary | 6 ++++++ include/linux/spi/spi.h | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary index 0f5122eb282b..4a02d2508bc8 100644 --- a/Documentation/spi/spi-summary +++ b/Documentation/spi/spi-summary @@ -511,10 +511,16 @@ SPI MASTER METHODS This sets up the device clock rate, SPI mode, and word sizes. Drivers may change the defaults provided by board_info, and then call spi_setup(spi) to invoke this routine. It may sleep. + Unless each SPI slave has its own configuration registers, don't change them right away ... otherwise drivers could corrupt I/O that's in progress for other SPI devices. + ** BUG ALERT: for some reason the first version of + ** many spi_master drivers seems to get this wrong. + ** When you code setup(), ASSUME that the controller + ** is actively processing transfers for another device. + master->transfer(struct spi_device *spi, struct spi_message *message) This must not sleep. Its responsibility is arrange that the transfer happens and its complete() callback is issued. The two diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 2cc43fa380cb..a0faa18f7b1b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -245,7 +245,12 @@ struct spi_master { */ u16 dma_alignment; - /* setup mode and clock, etc (spi driver may call many times) */ + /* Setup mode and clock, etc (spi driver may call many times). + * + * IMPORTANT: this may be called when transfers to another + * device are active. DO NOT UPDATE SHARED REGISTERS in ways + * which could break those transfers. + */ int (*setup)(struct spi_device *spi); /* bidirectional bulk transfers -- cgit v1.3-8-gc7d7 From 9b8de7479d0dbab1ed98b5b015d44232c9d3d08e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:24 +0100 Subject: FRV: Fix the section attribute on UP DECLARE_PER_CPU() In non-SMP mode, the variable section attribute specified by DECLARE_PER_CPU() does not agree with that specified by DEFINE_PER_CPU(). This means that architectures that have a small data section references relative to a base register may throw up linkage errors due to too great a displacement between where the base register points and the per-CPU variable. On FRV, the .h declaration says that the variable is in the .sdata section, but the .c definition says it's actually in the .data section. The linker throws up the following errors: kernel/built-in.o: In function `release_task': kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o To fix this, DECLARE_PER_CPU() should simply apply the same section attribute as does DEFINE_PER_CPU(). However, this is made slightly more complex by virtue of the fact that there are several variants on DEFINE, so these need to be matched by variants on DECLARE. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/percpu.h | 2 +- arch/ia64/include/asm/smp.h | 2 +- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/tlbflush.h | 2 +- include/asm-generic/percpu.h | 43 ++++++++++++++++++++++++++++++++++++++-- include/linux/percpu.h | 24 ---------------------- net/rds/rds.h | 2 +- 9 files changed, 50 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h index 3495e8e00d70..e9e0bb5a23bf 100644 --- a/arch/alpha/include/asm/percpu.h +++ b/arch/alpha/include/asm/percpu.h @@ -73,6 +73,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #endif /* SMP */ -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) +#include #endif /* __ALPHA_PERCPU_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 598408336251..d217d1d4e051 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -58,7 +58,7 @@ extern struct smp_boot_data { extern char no_int_routing __devinitdata; extern cpumask_t cpu_core_map[NR_CPUS]; -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); extern int smp_num_siblings; extern void __iomem *ipi_base_addr; extern unsigned char smp_int_redirect; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 5623c50d67b2..c45f415ce315 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -37,7 +37,7 @@ extern gate_desc idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 039db6aa8e02..37555e52f980 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -26,7 +26,7 @@ typedef struct { #endif } ____cacheline_aligned irq_cpustat_t; -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fcf4d92e7e04..c2cceae709c8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -138,7 +138,7 @@ extern struct tss_struct doublefault_tss; extern __u32 cleared_cpu_caps[NCAPINTS]; #ifdef CONFIG_SMP -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #define current_cpu_data __get_cpu_var(cpu_info) #else @@ -270,7 +270,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU(struct tss_struct, init_tss); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); /* * Save the original ist values for checking stack pointers during debugging @@ -393,7 +393,7 @@ union irq_stack_union { }; }; -DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d3539f998f88..16a5c84b0329 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -152,7 +152,7 @@ struct tlb_state { struct mm_struct *active_mm; int state; }; -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); static inline void reset_lazy_tlbstate(void) { diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672ebd..af47b9e10064 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -73,11 +73,50 @@ extern void setup_per_cpu_areas(void); #endif /* SMP */ +#ifndef PER_CPU_BASE_SECTION +#ifdef CONFIG_SMP +#define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP + +#ifdef MODULE +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#else +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" +#endif +#define PER_CPU_FIRST_SECTION ".first" + +#else + +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif + #ifndef PER_CPU_ATTRIBUTES #define PER_CPU_ATTRIBUTES #endif -#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ - __typeof__(type) per_cpu_var(name) +#define DECLARE_PER_CPU_SECTION(type, name, section) \ + extern \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DECLARE_PER_CPU(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "") + +#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DECLARE_PER_CPU_FIRST(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cfda2d5ad319..f052d8184993 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,30 +9,6 @@ #include -#ifndef PER_CPU_BASE_SECTION -#ifdef CONFIG_SMP -#define PER_CPU_BASE_SECTION ".data.percpu" -#else -#define PER_CPU_BASE_SECTION ".data" -#endif -#endif - -#ifdef CONFIG_SMP - -#ifdef MODULE -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#else -#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" -#endif -#define PER_CPU_FIRST_SECTION ".first" - -#else - -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_FIRST_SECTION "" - -#endif - #define DEFINE_PER_CPU_SECTION(type, name, section) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e5..71794449ca4e 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ -- cgit v1.3-8-gc7d7 From 5028eaa97dd1dab9cd7c30c4d38f71c708ca64bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:29 +0100 Subject: PERCPU: Collect the DECLARE/DEFINE declarations together Collect the DECLARE/DEFINE declarations together in linux/percpu-defs.h so that they're in one place, and give them descriptive comments, particularly the SHARED_ALIGNED variant. It would be nice to collect these in linux/percpu.h, but that's not possible without sorting out the severe #include recursion between the x86 arch headers and the general headers (and possibly other arches too). Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-generic/percpu.h | 26 ++------------ include/linux/percpu-defs.h | 84 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/percpu.h | 20 ----------- 3 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 include/linux/percpu-defs.h (limited to 'include/linux') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index af47b9e10064..d7d50d7ee51e 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -1,13 +1,9 @@ #ifndef _ASM_GENERIC_PERCPU_H_ #define _ASM_GENERIC_PERCPU_H_ + #include #include - -/* - * Determine the real variable name from the name visible in the - * kernel sources. - */ -#define per_cpu_var(var) per_cpu__##var +#include #ifdef CONFIG_SMP @@ -101,22 +97,4 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_ATTRIBUTES #endif -#define DECLARE_PER_CPU_SECTION(type, name, section) \ - extern \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name - -#define DECLARE_PER_CPU(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, "") - -#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - -#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") - -#define DECLARE_PER_CPU_FIRST(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) - #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h new file mode 100644 index 000000000000..8f921d74f49f --- /dev/null +++ b/include/linux/percpu-defs.h @@ -0,0 +1,84 @@ +#ifndef _LINUX_PERCPU_DEFS_H +#define _LINUX_PERCPU_DEFS_H + +/* + * Determine the real variable name from the name visible in the + * kernel sources. + */ +#define per_cpu_var(var) per_cpu__##var + +/* + * Base implementations of per-CPU variable declarations and definitions, where + * the section in which the variable is to be placed is provided by the + * 'section' argument. This may be used to affect the parameters governing the + * variable's storage. + * + * NOTE! The sections for the DECLARE and for the DEFINE must match, lest + * linkage errors occur due the compiler generating the wrong code to access + * that section. + */ +#define DECLARE_PER_CPU_SECTION(type, name, section) \ + extern \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SECTION(type, name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +/* + * Variant on the per-CPU variable declaration/definition theme used for + * ordinary per-CPU variables. + */ +#define DECLARE_PER_CPU(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "") + +#define DEFINE_PER_CPU(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "") + +/* + * Declaration/definition used for per-CPU variables that must come first in + * the set of variables. + */ +#define DECLARE_PER_CPU_FIRST(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) + +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) + +/* + * Declaration/definition used for per-CPU variables that must be cacheline + * aligned under SMP conditions so that, whilst a particular instance of the + * data corresponds to a particular CPU, inefficiencies due to direct access by + * other CPUs are reduced by preventing the data from unnecessarily spanning + * cachelines. + * + * An example of this would be statistical data, where each CPU's set of data + * is updated by that CPU alone, but the data from across all CPUs is collated + * by a CPU processing a read from a proc file. + */ +#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +/* + * Declaration/definition used for per-CPU variables that must be page aligned. + */ +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") + +/* + * Intermodule exports for per-CPU variables. + */ +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + + +#endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f052d8184993..1581ff235c7e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,26 +9,6 @@ #include -#define DEFINE_PER_CPU_SECTION(type, name, section) \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, "") - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") - -#define DEFINE_PER_CPU_FIRST(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES #define PERCPU_MODULE_RESERVE (8 << 10) -- cgit v1.3-8-gc7d7 From 5dd559f020c98a2a4b3e063f09c0e4bc771ed838 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 21 Apr 2009 16:30:32 -0600 Subject: Trivial: fix a typo in slow-work.h Fix a comment typo in slow-work.h ...a trivial mistake, but it will mess up kerneldoc if nothing else. Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- include/linux/slow-work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 85958277f83d..b65c8881f07a 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -67,7 +67,7 @@ static inline void slow_work_init(struct slow_work *work, } /** - * slow_work_init - Initialise a very slow work item + * vslow_work_init - Initialise a very slow work item * @work: The work item to initialise * @ops: The operations to use to handle the slow work item * -- cgit v1.3-8-gc7d7 From d4d5291c8cd499b1b590336059d5cc3e24c1ced6 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 21 Apr 2009 13:32:54 -0700 Subject: driver synchronization: make scsi_wait_scan more advanced There is currently only one way for userspace to say "wait for my storage device to get ready for the modules I just loaded": to load the scsi_wait_scan module. Expectations of userspace are that once this module is loaded, all the (storage) devices for which the drivers were loaded before the module load are present. Now, there are some issues with the implementation, and the async stuff got caught in the middle of this: The existing code only waits for the scsy async probing to finish, but it did not take into account at all that probing might not have begun yet. (Russell ran into this problem on his computer and the fix works for him) This patch fixes this more thoroughly than the previous "fix", which had some bad side effects (namely, for kernel code that wanted to wait for the scsi scan it would also do an async sync, which would deadlock if you did it from async context already.. there's a report about that on lkml): The patch makes the module first wait for all device driver probes, and then it will wait for the scsi parallel scan to finish. Signed-off-by: Arjan van de Ven Tested-by: Russell King Signed-off-by: Linus Torvalds --- drivers/base/dd.c | 1 + drivers/scsi/scsi_scan.c | 2 -- drivers/scsi/scsi_wait_scan.c | 11 +++++++++++ include/linux/device.h | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f17c3266a0e0..742cbe6b042b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -179,6 +179,7 @@ void wait_for_device_probe(void) wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); } +EXPORT_SYMBOL_GPL(wait_for_device_probe); /** * driver_probe_device - attempt to bind device & driver together diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index a14d245a66b8..6f51ca485f35 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -180,8 +180,6 @@ int scsi_complete_async_scans(void) spin_unlock(&async_scan_lock); kfree(data); - /* Synchronize async operations globally */ - async_synchronize_full(); return 0; } diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index 2f21af21269a..74708fcaf82f 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -11,10 +11,21 @@ */ #include +#include #include static int __init wait_scan_init(void) { + /* + * First we need to wait for device probing to finish; + * the drivers we just loaded might just still be probing + * and might not yet have reached the scsi async scanning + */ + wait_for_device_probe(); + /* + * and then we wait for the actual asynchronous scsi scan + * to finish. + */ scsi_complete_async_scans(); return 0; } diff --git a/include/linux/device.h b/include/linux/device.h index 2918c0e8fdfd..6a69caaac18a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -551,6 +551,7 @@ extern int (*platform_notify_remove)(struct device *dev); extern struct device *get_device(struct device *dev); extern void put_device(struct device *dev); +extern void wait_for_device_probe(void); /* drivers/base/power/shutdown.c */ extern void device_shutdown(void); -- cgit v1.3-8-gc7d7 From 451a9ebf653d28337ba53ed5b4b70b0b9543cca1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Apr 2009 19:50:51 +0200 Subject: bio: fix bio_kmalloc() Impact: fix bio_kmalloc() and its destruction path bio_kmalloc() was broken in two ways. * bvec_alloc_bs() first allocates bvec using kmalloc() and then ignores it and allocates again like non-kmalloc bvecs. * bio_kmalloc_destructor() didn't check for and free bio integrity data. This patch fixes the above problems. kmalloc patch is separated out from bio_alloc_bioset() and allocates the requested number of bvecs as inline bvecs. * bio_alloc_bioset() no longer takes NULL @bs. None other than bio_kmalloc() used it and outside users can't know how it was allocated anyway. * Define and use BIO_POOL_NONE so that pool index check in bvec_free_bs() triggers if inline or kmalloc allocated bvec gets there. * Relocate destructors on top of each allocation function so that how they're used is more clear. Jens Axboe suggested allocating bvecs inline. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/bio.c | 118 ++++++++++++++++++++++++---------------------------- include/linux/bio.h | 1 + 2 files changed, 55 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index cd42bb882f30..d35588fd6d57 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -174,14 +174,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, { struct bio_vec *bvl; - /* - * If 'bs' is given, lookup the pool and do the mempool alloc. - * If not, this is a bio_kmalloc() allocation and just do a - * kzalloc() for the exact number of vecs right away. - */ - if (!bs) - bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); - /* * see comment near bvec_array define! */ @@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs) mempool_free(p, bs->bio_pool); } -/* - * default destructor for a bio allocated with bio_alloc_bioset() - */ -static void bio_fs_destructor(struct bio *bio) -{ - bio_free(bio, fs_bio_set); -} - -static void bio_kmalloc_destructor(struct bio *bio) -{ - if (bio_has_allocated_vec(bio)) - kfree(bio->bi_io_vec); - kfree(bio); -} - void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); @@ -301,21 +278,15 @@ void bio_init(struct bio *bio) **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + unsigned long idx = BIO_POOL_NONE; struct bio_vec *bvl = NULL; - struct bio *bio = NULL; - unsigned long idx = 0; - void *p = NULL; - - if (bs) { - p = mempool_alloc(bs->bio_pool, gfp_mask); - if (!p) - goto err; - bio = p + bs->front_pad; - } else { - bio = kmalloc(sizeof(*bio), gfp_mask); - if (!bio) - goto err; - } + struct bio *bio; + void *p; + + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (unlikely(!p)) + return NULL; + bio = p + bs->front_pad; bio_init(bio); @@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) nr_iovecs = bvec_nr_vecs(idx); } +out_set: bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; -out_set: bio->bi_io_vec = bvl; - return bio; err_free: - if (bs) - mempool_free(p, bs->bio_pool); - else - kfree(bio); -err: + mempool_free(p, bs->bio_pool); return NULL; } +static void bio_fs_destructor(struct bio *bio) +{ + bio_free(bio, fs_bio_set); +} + +/** + * bio_alloc - allocate a new bio, memory pool backed + * @gfp_mask: allocation mask to use + * @nr_iovecs: number of iovecs + * + * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask + * contains __GFP_WAIT, the allocation is guaranteed to succeed. + * + * RETURNS: + * Pointer to new bio on success, NULL on failure. + */ +struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) +{ + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + + if (bio) + bio->bi_destructor = bio_fs_destructor; + + return bio; +} + +static void bio_kmalloc_destructor(struct bio *bio) +{ + if (bio_integrity(bio)) + bio_integrity_free(bio); + kfree(bio); +} + /** * bio_alloc - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -366,29 +365,20 @@ err: * do so can cause livelocks under memory pressure. * **/ -struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) -{ - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); - - if (bio) - bio->bi_destructor = bio_fs_destructor; - - return bio; -} - -/* - * Like bio_alloc(), but doesn't use a mempool backing. This means that - * it CAN fail, but while bio_alloc() can only be used for allocations - * that have a short (finite) life span, bio_kmalloc() should be used - * for more permanent bio allocations (like allocating some bio's for - * initalization or setup purposes). - */ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) { - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); + struct bio *bio; - if (bio) - bio->bi_destructor = bio_kmalloc_destructor; + bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), + gfp_mask); + if (unlikely(!bio)) + return NULL; + + bio_init(bio); + bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; + bio->bi_max_vecs = nr_iovecs; + bio->bi_io_vec = bio->bi_inline_vecs; + bio->bi_destructor = bio_kmalloc_destructor; return bio; } diff --git a/include/linux/bio.h b/include/linux/bio.h index b89cf2d82898..7b214fd672a2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -132,6 +132,7 @@ struct bio { * top 4 bits of bio flags indicate the pool this bio came from */ #define BIO_POOL_BITS (4) +#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) #define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) -- cgit v1.3-8-gc7d7 From 71982a409f12c50d011325a4471aa20666bb908d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Apr 2009 08:34:48 +0200 Subject: block: include empty disks in /proc/diskstats /proc/diskstats used to show stats for all disks whether they're zero-sized or not and their non-zero partitions. Commit 074a7aca7afa6f230104e8e65eba3420263714a5 accidentally changed the behavior such that it doesn't print out zero sized disks. This patch implements DISK_PITER_INCL_EMPTY_PART0 flag to partition iterator and uses it in diskstats_show() such that empty part0 is shown in /proc/diskstats. Reported and bisectd by Dianel Collins. Signed-off-by: Tejun Heo Reported-by: Daniel Collins Signed-off-by: Jens Axboe --- block/genhd.c | 12 ++++++++---- include/linux/genhd.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index a9ec910974c1..1a4916e01732 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -98,7 +98,7 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, if (flags & DISK_PITER_REVERSE) piter->idx = ptbl->len - 1; - else if (flags & DISK_PITER_INCL_PART0) + else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) piter->idx = 0; else piter->idx = 1; @@ -134,7 +134,8 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* determine iteration parameters */ if (piter->flags & DISK_PITER_REVERSE) { inc = -1; - if (piter->flags & DISK_PITER_INCL_PART0) + if (piter->flags & (DISK_PITER_INCL_PART0 | + DISK_PITER_INCL_EMPTY_PART0)) end = -1; else end = 0; @@ -150,7 +151,10 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + if (!part->nr_sects && + !(piter->flags & DISK_PITER_INCL_EMPTY) && + !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && + piter->idx == 0)) continue; get_device(part_to_dev(part)); @@ -1011,7 +1015,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { cpu = part_stat_lock(); part_round_stats(cpu, hd); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 634c53028fb8..a1a28caed23d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -214,6 +214,7 @@ static inline void disk_put_part(struct hd_struct *part) #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ #define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ +#define DISK_PITER_INCL_EMPTY_PART0 (1 << 3) /* include empty partition 0 */ struct disk_part_iter { struct gendisk *disk; -- cgit v1.3-8-gc7d7 From 4cd481f68dde99ac416003b825c835f71e364393 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 13 Apr 2009 11:59:32 +0200 Subject: KVM: Fix overlapping check for memory slots When checking for overlapping slots on registration of a new one, kvm currently also considers zero-length (ie. deleted) slots and rejects requests incorrectly. This finally denies user space from joining slots. Fix the check by skipping deleted slots and advertise this via a KVM_CAP_JOIN_MEMORY_REGIONS_WORKS. Cc: stable@kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ virt/kvm/kvm_main.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 311a073afe8a..8cc137911b34 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -409,6 +409,8 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 28d693a1ee8f..1ecbe2391c8b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -961,7 +961,7 @@ int __kvm_set_memory_region(struct kvm *kvm, for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *s = &kvm->memslots[i]; - if (s == memslot) + if (s == memslot || !s->npages) continue; if (!((base_gfn + npages <= s->base_gfn) || (base_gfn >= s->base_gfn + s->npages))) @@ -1983,6 +1983,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) switch (arg) { case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v1.3-8-gc7d7 From 1b6b8ce2ac372ea1f2065b89228ede105eb68dc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 9 Apr 2009 14:57:39 +0800 Subject: PCI: only save/restore existent registers in the PCIe capability PCIe 1.1 base neither requires the endpoint to implement the entire PCIe capability structure nor specifies default values of registers that are not implemented by the device. So we only save and restore registers that must be implemented by different device types if the device PCIe capability version is 1. PCIe 1.1 Capability Structure Expansion ECN and PCIe 2.0 requires all registers in the PCIe capability to be either implemented or hardwired to 0. Their PCIe capability version is 2. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 70 ++++++++++++++++++++++++++++++++++++++---------- include/linux/pci_regs.h | 1 + 2 files changed, 57 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 16fd0d4c3166..34bf0fdf5047 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -681,11 +681,34 @@ EXPORT_SYMBOL(pci_choose_state); #define PCI_EXP_SAVE_REGS 7 +#define pcie_cap_has_devctl(type, flags) 1 +#define pcie_cap_has_lnkctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + (type == PCI_EXP_TYPE_ROOT_PORT || \ + type == PCI_EXP_TYPE_ENDPOINT || \ + type == PCI_EXP_TYPE_LEG_END)) +#define pcie_cap_has_sltctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + ((type == PCI_EXP_TYPE_ROOT_PORT) || \ + (type == PCI_EXP_TYPE_DOWNSTREAM && \ + (flags & PCI_EXP_FLAGS_SLOT)))) +#define pcie_cap_has_rtctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + (type == PCI_EXP_TYPE_ROOT_PORT || \ + type == PCI_EXP_TYPE_RC_EC)) +#define pcie_cap_has_devctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) +#define pcie_cap_has_lnkctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) +#define pcie_cap_has_sltctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; struct pci_cap_saved_state *save_state; u16 *cap; + u16 flags; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (pos <= 0) @@ -698,13 +721,22 @@ static int pci_save_pcie_state(struct pci_dev *dev) } cap = (u16 *)&save_state->data[0]; - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); + + if (pcie_cap_has_devctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &cap[i++]); + if (pcie_cap_has_lnkctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); + if (pcie_cap_has_sltctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); + if (pcie_cap_has_rtctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + if (pcie_cap_has_devctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + if (pcie_cap_has_lnkctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + if (pcie_cap_has_sltctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -714,6 +746,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev) int i = 0, pos; struct pci_cap_saved_state *save_state; u16 *cap; + u16 flags; save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); pos = pci_find_capability(dev, PCI_CAP_ID_EXP); @@ -721,13 +754,22 @@ static void pci_restore_pcie_state(struct pci_dev *dev) return; cap = (u16 *)&save_state->data[0]; - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); + + if (pcie_cap_has_devctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, cap[i++]); + if (pcie_cap_has_lnkctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); + if (pcie_cap_has_sltctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); + if (pcie_cap_has_rtctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + if (pcie_cap_has_devctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + if (pcie_cap_has_lnkctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + if (pcie_cap_has_sltctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e4d08c1b2e0b..616bf8b3c8b5 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -376,6 +376,7 @@ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ #define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0x10 /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ -- cgit v1.3-8-gc7d7 From 952043ac12a117d8e94bddd9088338d7ad20ca7d Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Apr 2009 08:48:15 +0100 Subject: bitops: Add __ffs64 bitop Finds the first set bit in a 64 bit word. This is required in order to fix a bug in GFS2, but I think it should be a generic function in case of future users. Signed-off-by: Steven Whitehouse Reviewed-by: Christoph Lameter Reviewed-by: Willy Tarreau --- include/linux/bitops.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 61829139795a..c05a29cb9bb2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -112,6 +112,25 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +/** + * __ffs64 - find first set bit in a 64 bit word + * @word: The 64 bit word + * + * On 64 bit arches this is a synomyn for __ffs + * The result is not defined if no bits are set, so check that @word + * is non-zero before calling this. + */ +static inline unsigned long __ffs64(u64 word) +{ +#if BITS_PER_LONG == 32 + if (((u32)word) == 0UL) + return __ffs((u32)(word >> 32)) + 32; +#elif BITS_PER_LONG != 64 +#error BITS_PER_LONG not 32 or 64 +#endif + return __ffs((unsigned long)word); +} + #ifdef __KERNEL__ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT -- cgit v1.3-8-gc7d7 From fbfc396efbc11d784b4325adfc02e82a0df01a8d Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Tue, 21 Apr 2009 20:52:54 -0700 Subject: USB: musb: Prevent multiple includes of musb.h Add #ifndef to musb header file to prevent multiple inclusions. Signed-off-by: Mark A. Greer Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/musb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index d6aad0ea6033..d43755669261 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -7,6 +7,9 @@ * key configuration differences between boards. */ +#ifndef __LINUX_USB_MUSB_H +#define __LINUX_USB_MUSB_H + /* The USB role is defined by the connector used on the board, so long as * standards are being followed. (Developer boards sometimes won't.) */ @@ -101,3 +104,5 @@ extern int __init tusb6010_setup_interface( extern int tusb6010_platform_retime(unsigned is_refclk); #endif /* OMAP2 */ + +#endif /* __LINUX_USB_MUSB_H */ -- cgit v1.3-8-gc7d7 From 097102c2d04974bdfcfa16a5f3062d499842139c Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Tue, 21 Apr 2009 09:33:14 +0200 Subject: pktcdvd.h should include mempool.h Fix this build error: In file included from fs/compat_ioctl.c:104: include/linux/pktcdvd.h:285: error: expected specifier-qualifier-list before 'mempool_t' Signed-off-by: Alexander Beregalov Signed-off-by: Jens Axboe --- include/linux/pktcdvd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 04b4d7330e6d..d745f5b6c7b0 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -113,6 +113,7 @@ struct pkt_ctrl_command { #include #include #include +#include /* default bio write queue congestion marks */ #define PKT_WRITE_CONGESTION_ON 10000 -- cgit v1.3-8-gc7d7 From 42dad7647aec49b3ad20dd0cb832b232a6ae514f Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 22 Apr 2009 14:01:49 +0200 Subject: block: simplify I/O stat accounting This simplifies I/O stat accounting switching code and separates it completely from I/O scheduler switch code. Requests are accounted according to the state of their request queue at the time of the request allocation. There is no need anymore to flush the request queue when switching I/O accounting state. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ++++-- block/blk-merge.c | 5 ++++- block/blk-sysfs.c | 4 ---- block/blk.h | 7 +------ include/linux/blkdev.h | 3 +++ 5 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab75403e1a..2998fe3a2377 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) blk_rq_init(q, rq); - rq->cmd_flags = rw | REQ_ALLOCED; + rq->cmd_flags = flags | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, gfp_mask))) { @@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (priv) rl->elvpriv++; + if (blk_queue_io_stat(q)) + rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); diff --git a/block/blk-merge.c b/block/blk-merge.c index 63760ca3da0f..23d2a6fe34a3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - blk_account_io_merge(req); + /* + * 'next' is going away, so update stats accordingly + */ + blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cac4e9febe6a..3ff9bba3379a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); - if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - - elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 5dfc41267a08..79c85f7c9ff5 100644 --- a/block/blk.h +++ b/block/blk.h @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu) static inline int blk_do_io_stat(struct request *rq) { - struct gendisk *disk = rq->rq_disk; - - if (!disk || !disk->queue) - return 0; - - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); + return rq->rq_disk && blk_rq_io_stat(rq); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..2755d5c6da22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ enum rq_flag_bits { __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ + __REQ_IO_STAT, /* account I/O stat */ __REQ_NR_BITS, /* stops here */ }; @@ -145,6 +146,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) #define BLK_MAX_CDB 16 @@ -598,6 +600,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.3-8-gc7d7 From 71951b64a5a87c09eb6fde59ce51aaab2fdaeab2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2009 16:58:41 +0200 Subject: netfilter: nf_ct_dccp: add missing role attributes for DCCP This patch adds missing role attribute to the DCCP type, otherwise the creation of entries is not of any use. The attribute added is CTA_PROTOINFO_DCCP_ROLE which contains the role of the conntrack original tuple. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 29fe9ea1d346..1a865e48b8eb 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -100,6 +100,7 @@ enum ctattr_protoinfo_tcp { enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_UNSPEC, CTA_PROTOINFO_DCCP_STATE, + CTA_PROTOINFO_DCCP_ROLE, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5411d63f31a5..8e757dd53396 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -633,6 +633,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -644,6 +646,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -661,11 +664,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + !tb[CTA_PROTOINFO_DCCP_ROLE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; + } write_lock_bh(&dccp_lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + } else { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; + } write_unlock_bh(&dccp_lock); return 0; } -- cgit v1.3-8-gc7d7 From c80d471a476b6d6fe0bc1fd25293c24c66b7aaaf Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sat, 25 Apr 2009 22:10:56 -0400 Subject: Add new HEAD_TEXT_SECTION macro. This patch is preparation for replacing all uses of ".head.text" or ".text.head" in the kernel with macros, so that the section name can later be changed without having to touch a lot of the kernel. Since some linker scripts do more complex things than referencing HEAD_TEXT, we add a HEAD_TEXT_SECTION macro that just contains the actual name. I've defined HEAD_TEXT_SECTION in a new header, include/linux/section-names.h, so that this section name only needs to appear in one place. I anticipate creating similar macro structures for a number of other section names. The long-term goal here is to be able to change the kernel's magic section names to those that are compatible with -ffunction-sections -fdata-sections. This requires renaming all magic sections with names of the form ".text.foo". Signed-off-by: Tim Abbott Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 4 +++- include/linux/init.h | 4 +++- include/linux/section-names.h | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 include/linux/section-names.h (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7fa660fd449c..eaa06ef6f7d9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1,3 +1,5 @@ +#include + #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 #endif @@ -331,7 +333,7 @@ #endif /* Section used for early init (in .S files) */ -#define HEAD_TEXT *(.head.text) +#define HEAD_TEXT *(HEAD_TEXT_SECTION) /* init and exit section handling */ #define INIT_DATA \ diff --git a/include/linux/init.h b/include/linux/init.h index f121a7a10c3d..20a1334e34e9 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -2,6 +2,8 @@ #define _LINUX_INIT_H #include +#include +#include /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) @@ -107,7 +109,7 @@ #define __memexitconst __section(.memexit.rodata) /* For assembly routines */ -#define __HEAD .section ".head.text","ax" +#define __HEAD .section __stringify(HEAD_TEXT_SECTION),"ax" #define __INIT .section ".init.text","ax" #define __FINIT .previous diff --git a/include/linux/section-names.h b/include/linux/section-names.h new file mode 100644 index 000000000000..c956f4eb2adf --- /dev/null +++ b/include/linux/section-names.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_SECTION_NAMES_H +#define __LINUX_SECTION_NAMES_H + +#define HEAD_TEXT_SECTION .head.text + +#endif /* !__LINUX_SECTION_NAMES_H */ -- cgit v1.3-8-gc7d7 From c759a6b4e1cae6aff71f58c9c85404ebcd81b6e0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 27 Apr 2009 02:36:20 -0700 Subject: net: Fix LL_MAX_HEADER for CONFIG_TR_MODULE Unless I miss anything this should fix a bug. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7783f4a755..453be9a674c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -104,7 +104,7 @@ struct wireless_dev; # else # define LL_MAX_HEADER 96 # endif -#elif defined(CONFIG_TR) +#elif defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) # define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 -- cgit v1.3-8-gc7d7 From 37b607c5ac3b7c92a6a3624bb29f1cdcdcf7044a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 27 Apr 2009 05:45:54 -0700 Subject: net: Fix typo in net_device_ops description. Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 453be9a674c0..5a96a1a406e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -500,7 +500,7 @@ struct netdev_queue { * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address - * needs to be changed. If not this interface is not defined, the + * needs to be changed. If this interface is not defined, the * mac address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); -- cgit v1.3-8-gc7d7 From 27b1833279995e7c290a40cac4ef36ccea7e9283 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 27 Apr 2009 14:02:27 -0400 Subject: Remove unused support code for refok sections. The old refok sections .text.init.refok .data.init.refok .exit.text.refok have been deprecated since commit 312b1485fb509c9bc32eda28ad29537896658cb8. After the other patches in this patch series nothing is put in these sections, so clean things up by eliminating all the remaining references to them. Signed-off-by: Tim Abbott Acked-by: Sam Ravnborg Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 3 --- include/linux/init.h | 8 -------- scripts/mod/modpost.c | 18 ------------------ 3 files changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eaa06ef6f7d9..89853bcd27a6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -90,7 +90,6 @@ /* .data section */ #define DATA_DATA \ *(.data) \ - *(.data.init.refok) \ *(.ref.data) \ DEV_KEEP(init.data) \ DEV_KEEP(exit.data) \ @@ -289,8 +288,6 @@ *(.text.hot) \ *(.text) \ *(.ref.text) \ - *(.text.init.refok) \ - *(.exit.text.refok) \ DEV_KEEP(init.text) \ DEV_KEEP(exit.text) \ CPU_KEEP(init.text) \ diff --git a/include/linux/init.h b/include/linux/init.h index 20a1334e34e9..0e06c176f185 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -62,14 +62,6 @@ #define __refdata __section(.ref.data) #define __refconst __section(.ref.rodata) -/* backward compatibility note - * A few places hardcode the old section names: - * .text.init.refok - * .data.init.refok - * .exit.text.refok - * They should be converted to use the defines from this file - */ - /* compatibility defines */ #define __init_refok __ref #define __initdata_refok __refdata diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index df6e6286a065..8d46ea7d6715 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -794,15 +794,6 @@ static const char *init_exit_sections[] = /* data section */ static const char *data_sections[] = { DATA_SECTIONS, NULL }; -/* sections that may refer to an init/exit section with no warning */ -static const char *initref_sections[] = -{ - ".text.init.refok*", - ".exit.text.refok*", - ".data.init.refok*", - NULL -}; - /* symbols in .data that may refer to init/exit sections */ static const char *symbol_white_list[] = @@ -915,11 +906,6 @@ static int section_mismatch(const char *fromsec, const char *tosec) /** * Whitelist to allow certain references to pass with no warning. * - * Pattern 0: - * Do not warn if funtion/data are marked with __init_refok/__initdata_refok. - * The pattern is identified by: - * fromsec = .text.init.refok* | .data.init.refok* - * * Pattern 1: * If a module parameter is declared __initdata and permissions=0 * then this is legal despite the warning generated. @@ -958,10 +944,6 @@ static int section_mismatch(const char *fromsec, const char *tosec) static int secref_whitelist(const char *fromsec, const char *fromsym, const char *tosec, const char *tosym) { - /* Check for pattern 0 */ - if (match(fromsec, initref_sections)) - return 0; - /* Check for pattern 1 */ if (match(tosec, init_data_sections) && match(fromsec, data_sections) && -- cgit v1.3-8-gc7d7 From bf368e4e70cd4e0f880923c44e95a4273d725ab4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2009 02:24:21 -0700 Subject: net: Avoid extra wakeups of threads blocked in wait_for_packet() In 2.6.25 we added UDP mem accounting. This unfortunatly added a penalty when a frame is transmitted, since we have at TX completion time to call sock_wfree() to perform necessary memory accounting. This calls sock_def_write_space() and utimately scheduler if any thread is waiting on the socket. Thread(s) waiting for an incoming frame was scheduled, then had to sleep again as event was meaningless. (All threads waiting on a socket are using same sk_sleep anchor) This adds lot of extra wakeups and increases latencies, as noted by Christoph Lameter, and slows down softirq handler. Reference : http://marc.info/?l=linux-netdev&m=124060437012283&w=2 Fortunatly, Davide Libenzi recently added concept of keyed wakeups into kernel, and particularly for sockets (see commit 37e5540b3c9d838eb20f2ca8ea2eb8072271e403 epoll keyed wakeups: make sockets use keyed wakeups) Davide goal was to optimize epoll, but this new wakeup infrastructure can help non epoll users as well, if they care to setup an appropriate handler. This patch introduces new DEFINE_WAIT_FUNC() helper and uses it in wait_for_packet(), so that only relevant event can wakeup a thread blocked in this function. Trace of function calls from bnx2 TX completion bnx2_poll_work() is : __kfree_skb() skb_release_head_state() sock_wfree() sock_def_write_space() __wake_up_sync_key() __wake_up_common() receiver_wake_function() : Stops here since thread is waiting for an INPUT Reported-by: Christoph Lameter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/wait.h | 6 ++++-- net/core/datagram.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c17eaee..bc024632f365 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -440,13 +440,15 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -#define DEFINE_WAIT(name) \ +#define DEFINE_WAIT_FUNC(name, function) \ wait_queue_t name = { \ .private = current, \ - .func = autoremove_wake_function, \ + .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ } +#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) + #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378d..b01a76abe1d2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); -- cgit v1.3-8-gc7d7 From 6916d97f6e25cc66a32d6e9a16419067d843b14f Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Mon, 27 Apr 2009 11:52:43 -0700 Subject: Input: bcm5974 - add quad-finger tapping The integrated button on the new unibody Macbooks presents a need to report explicit four-finger actions. Evidently, the finger pressing the button is also touching the trackpad, so in order to fully support three-finger actions, the driver must be able to report four-finger actions. This patch adds a new button, BTN_TOOL_QUADTAP, which achieves this. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 4 +++- include/linux/input.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index bda873393b0d..2ddf05e1d852 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -258,6 +258,7 @@ static void setup_events_to_report(struct input_dev *input_dev, __set_bit(BTN_TOOL_FINGER, input_dev->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); __set_bit(BTN_LEFT, input_dev->keybit); } @@ -329,7 +330,8 @@ static int report_tp_state(struct bcm5974 *dev, int size) input_report_key(input, BTN_TOUCH, dev->fingers > 0); input_report_key(input, BTN_TOOL_FINGER, dev->fingers == 1); input_report_key(input, BTN_TOOL_DOUBLETAP, dev->fingers == 2); - input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers > 2); + input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers == 3); + input_report_key(input, BTN_TOOL_QUADTAP, dev->fingers > 3); input_report_abs(input, ABS_PRESSURE, abs_p); input_report_abs(input, ABS_TOOL_WIDTH, abs_w); diff --git a/include/linux/input.h b/include/linux/input.h index 6b28048fc568..32cb825939be 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -445,6 +445,7 @@ struct input_absinfo { #define BTN_STYLUS2 0x14c #define BTN_TOOL_DOUBLETAP 0x14d #define BTN_TOOL_TRIPLETAP 0x14e +#define BTN_TOOL_QUADTAP 0x14f /* Four fingers on trackpad */ #define BTN_WHEEL 0x150 #define BTN_GEAR_DOWN 0x150 -- cgit v1.3-8-gc7d7 From 5e5ee686e3c0f8a3cbe9b75c2690326bf91af10d Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Tue, 28 Apr 2009 07:47:33 -0700 Subject: Input: add detailed multi-touch finger data report protocol In order to utilize the full power of the new multi-touch devices, a way to report detailed finger data to user space is needed. This patch adds a multi-touch (MT) protocol which allows drivers to report details for an arbitrary number of fingers. The driver sends a SYN_MT_REPORT event via the input_mt_sync() function when a complete finger has been reported. In order to stay compatible with existing applications, the data reported in a finger packet must not be recognized as single-touch events. In addition, all finger data must bypass input filtering, since subsequent events of the same type refer to different fingers. A set of ABS_MT events with the desired properties are defined. The events are divided into categories, to allow for partial implementation. The minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size of the approaching finger. Anisotropy and direction may be specified with ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. Devices with more granular information may specify general shapes as blobs, i.e., as a sequence of rectangular shapes grouped together by a ABS_MT_BLOB_ID. Finally, the ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a finger or a pen. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 13 +++++++++++++ include/linux/input.h | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 8ff92aa13a0a..e54e002665b0 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -33,6 +33,15 @@ MODULE_LICENSE("GPL"); * EV_ABS events which should not be cached are listed here. */ static unsigned int input_abs_bypass_init_data[] __initdata = { + ABS_MT_TOUCH_MAJOR, + ABS_MT_TOUCH_MINOR, + ABS_MT_WIDTH_MAJOR, + ABS_MT_WIDTH_MINOR, + ABS_MT_ORIENTATION, + ABS_MT_POSITION_X, + ABS_MT_POSITION_Y, + ABS_MT_TOOL_TYPE, + ABS_MT_BLOB_ID, 0 }; static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; @@ -169,6 +178,10 @@ static void input_handle_event(struct input_dev *dev, disposition = INPUT_PASS_TO_HANDLERS; } break; + case SYN_MT_REPORT: + dev->sync = 0; + disposition = INPUT_PASS_TO_HANDLERS; + break; } break; diff --git a/include/linux/input.h b/include/linux/input.h index 32cb825939be..0e6ff5de3588 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -106,6 +106,7 @@ struct input_absinfo { #define SYN_REPORT 0 #define SYN_CONFIG 1 +#define SYN_MT_REPORT 2 /* * Keys and buttons @@ -645,6 +646,17 @@ struct input_absinfo { #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 #define ABS_MISC 0x28 + +#define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ +#define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ +#define ABS_MT_WIDTH_MAJOR 0x32 /* Major axis of approaching ellipse */ +#define ABS_MT_WIDTH_MINOR 0x33 /* Minor axis (omit if circular) */ +#define ABS_MT_ORIENTATION 0x34 /* Ellipse orientation */ +#define ABS_MT_POSITION_X 0x35 /* Center X ellipse position */ +#define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ +#define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ +#define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ + #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) @@ -743,6 +755,12 @@ struct input_absinfo { #define BUS_GSC 0x1A #define BUS_ATARI 0x1B +/* + * MT_TOOL types + */ +#define MT_TOOL_FINGER 0 +#define MT_TOOL_PEN 1 + /* * Values describing the status of a force-feedback effect */ @@ -1312,6 +1330,11 @@ static inline void input_sync(struct input_dev *dev) input_event(dev, EV_SYN, SYN_REPORT, 0); } +static inline void input_mt_sync(struct input_dev *dev) +{ + input_event(dev, EV_SYN, SYN_MT_REPORT, 0); +} + void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code); static inline void input_set_abs_params(struct input_dev *dev, int axis, int min, int max, int fuzz, int flat) -- cgit v1.3-8-gc7d7 From 9f6532519feab921856f41b30a2397ee25f4de49 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Apr 2009 21:31:30 -0700 Subject: regulator: fix header file missing kernel-doc Add regulator header file missing kernel-doc: Warning(include/linux/regulator/driver.h:117): No description found for parameter 'set_mode' Signed-off-by: Randy Dunlap cc: Liam Girdwood cc: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4848d8dacd90..225f733e7533 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -50,6 +50,7 @@ enum regulator_status { * @set_current_limit: Configure a limit for a current-limited regulator. * @get_current_limit: Get the configured limit for a current-limited regulator. * + * @set_mode: Set the configured operating mode for the regulator. * @get_mode: Get the configured operating mode for the regulator. * @get_status: Return actual (not as-configured) status of regulator, as a * REGULATOR_STATUS value (or negative errno) -- cgit v1.3-8-gc7d7 From 942e4a2bd680c606af0211e64eb216be2e19bf61 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 28 Apr 2009 22:36:33 -0700 Subject: netfilter: revised locking for x_tables The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger Acked-by: Linus Torvalds Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 73 +++++++++++++++++++-- net/ipv4/netfilter/arp_tables.c | 125 +++++++++++------------------------- net/ipv4/netfilter/ip_tables.c | 126 +++++++++++-------------------------- net/ipv6/netfilter/ip6_tables.c | 123 +++++++++++------------------------- net/netfilter/x_tables.c | 53 ++++++++-------- 5 files changed, 204 insertions(+), 296 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7b1a652066c0..1b2e43502ef7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -354,9 +354,6 @@ struct xt_table /* What hooks you will enter on */ unsigned int valid_hooks; - /* Lock for the curtain */ - struct mutex lock; - /* Man behind the curtain... */ struct xt_table_info *private; @@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); -extern void xt_table_entry_swap_rcu(struct xt_table_info *old, - struct xt_table_info *new); + +/* + * Per-CPU spinlock associated with per-cpu table entries, and + * with a counter for the "reading" side that allows a recursive + * reader to avoid taking the lock and deadlocking. + * + * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. + * It needs to ensure that the rules are not being changed while the packet + * is being processed. In some cases, the read lock will be acquired + * twice on the same CPU; this is okay because of the count. + * + * "writing" is used when reading counters. + * During replace any readers that are using the old tables have to complete + * before freeing the old table. This is handled by the write locking + * necessary for reading the counters. + */ +struct xt_info_lock { + spinlock_t lock; + unsigned char readers; +}; +DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); + +/* + * Note: we need to ensure that preemption is disabled before acquiring + * the per-cpu-variable, so we do it as a two step process rather than + * using "spin_lock_bh()". + * + * We _also_ need to disable bottom half processing before updating our + * nesting count, to make sure that the only kind of re-entrancy is this + * code being called by itself: since the count+lock is not an atomic + * operation, we can allow no races. + * + * _Only_ that special combination of being per-cpu and never getting + * re-entered asynchronously means that the count is safe. + */ +static inline void xt_info_rdlock_bh(void) +{ + struct xt_info_lock *lock; + + local_bh_disable(); + lock = &__get_cpu_var(xt_info_locks); + if (!lock->readers++) + spin_lock(&lock->lock); +} + +static inline void xt_info_rdunlock_bh(void) +{ + struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); + + if (!--lock->readers) + spin_unlock(&lock->lock); + local_bh_enable(); +} + +/* + * The "writer" side needs to get exclusive access to the lock, + * regardless of readers. This must be called with bottom half + * processing (and thus also preemption) disabled. + */ +static inline void xt_info_wrlock(unsigned int cpu) +{ + spin_lock(&per_cpu(xt_info_locks, cpu).lock); +} + +static inline void xt_info_wrunlock(unsigned int cpu) +{ + spin_unlock(&per_cpu(xt_info_locks, cpu).lock); +} /* * This helper is performance critical and must be inlined diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234db..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); if (hotdrop) return NF_DROP; @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct arpt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); - + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d4..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } - -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } - -static inline int -zero_entry_counter(struct ipt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 800ae8542471..219e165aea10 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct ip6t_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); + unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9f..150e5cf62f85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, - struct xt_table_info *newinfo) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - void *p = oldinfo->entries[cpu]; - rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); - newinfo->entries[cpu] = p; - } - -} -EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); - /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif +DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); +EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); + + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *oldinfo, *private; + struct xt_table_info *private; /* Do the substitution. */ - mutex_lock(&table->lock); + local_bh_disable(); private = table->private; + /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - mutex_unlock(&table->lock); + local_bh_enable(); *error = -EAGAIN; return NULL; } - oldinfo = private; - rcu_assign_pointer(table->private, newinfo); - newinfo->initial_entries = oldinfo->initial_entries; - mutex_unlock(&table->lock); - synchronize_net(); - return oldinfo; + table->private = newinfo; + newinfo->initial_entries = private->initial_entries; + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries. This is okay, because + * resynchronization happens because of the locking done + * during the get_counters() routine. + */ + local_bh_enable(); + + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - int i, rv; + unsigned int i; + int rv; + + for_each_possible_cpu(i) { + struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); + spin_lock_init(&lock->lock); + lock->readers = 0; + } xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) -- cgit v1.3-8-gc7d7 From d37dc42ab6f040b8f0f2962ab219c5b2accf748d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:45:08 -0400 Subject: nls: add a nls_nullsize inline It's possible for character sets to require a multi-byte null string terminator. Add a helper function that determines the size of the null terminator at runtime. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- include/linux/nls.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nls.h b/include/linux/nls.h index 6a882208301a..52b1a76c1b43 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -58,6 +58,25 @@ static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, return 0; } +/* + * nls_nullsize - return length of null character for codepage + * @codepage - codepage for which to return length of NULL terminator + * + * Since we can't guarantee that the null terminator will be a particular + * length, we have to check against the codepage. If there's a problem + * determining it, assume a single-byte NULL terminator. + */ +static inline int +nls_nullsize(const struct nls_table *codepage) +{ + int charlen; + char tmp[NLS_MAX_CHARSET_SIZE]; + + charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); + + return charlen > 0 ? charlen : 1; +} + #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */ -- cgit v1.3-8-gc7d7 From 96c16743973e8c1a7b9c655d10b7973408d6d1dd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 30 Apr 2009 18:24:34 +0200 Subject: ide-cd: fix REQ_QUIET tests in cdrom_decode_status Original patch (dfa4411cc3a690011cab90e9a536938795366cf9) was buggy. This is a more proper fix which introduces blk_rq_quiet() macro alleviating the need for dumb, too short caching variables. Thanks to Helge Deller and Bart for debugging this. Signed-off-by: Borislav Petkov Cc: Jens Axboe Cc: Sergei Shtylyov Reported-and-tested-by: Helge Deller Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 9 ++++----- include/linux/blkdev.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 3d4e09969763..925eb9e245d1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -312,7 +312,6 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) ide_hwif_t *hwif = drive->hwif; struct request *rq = hwif->rq; int err, sense_key, do_end_request = 0; - u8 quiet = rq->cmd_flags & REQ_QUIET; /* get the IDE error register */ err = ide_read_error(drive); @@ -347,7 +346,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } else { cdrom_saw_media_change(drive); - if (blk_fs_request(rq) && !quiet) + if (blk_fs_request(rq) && !blk_rq_quiet(rq)) printk(KERN_ERR PFX "%s: tray open\n", drive->name); } @@ -382,7 +381,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in retrying after an illegal request or data * protect error. */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "command error", stat); do_end_request = 1; break; @@ -391,14 +390,14 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in re-trying a zillion times on a bad sector. * If we got here the error is not correctable. */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "media error " "(bad sector)", stat); do_end_request = 1; break; case BLANK_CHECK: /* disk appears blank? */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "media error (blank)", stat); do_end_request = 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..6f841fb1be30 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -598,6 +598,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.3-8-gc7d7 From 0f3d042ed2f934f149ccb78300454beaf0c1134b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 May 2009 09:10:46 -0700 Subject: netfilter: use likely() in xt_info_rdlock_bh() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1b2e43502ef7..c9efe039dc57 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -472,7 +472,7 @@ static inline void xt_info_rdlock_bh(void) local_bh_disable(); lock = &__get_cpu_var(xt_info_locks); - if (!lock->readers++) + if (likely(!lock->readers++)) spin_lock(&lock->lock); } @@ -480,7 +480,7 @@ static inline void xt_info_rdunlock_bh(void) { struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); - if (!--lock->readers) + if (likely(!--lock->readers)) spin_unlock(&lock->lock); local_bh_enable(); } -- cgit v1.3-8-gc7d7 From c047fcd245975f40312ed57bf43e7d4abd188e6b Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 1 May 2009 15:34:02 -0700 Subject: virtio: add missing include to virtio_net.h virtio_net.h uses the macro ETH_ALEN which is defined in linux/if_ether.h. Discovered when hacking on virtio-over-pci patches. Signed-off-by: Grant Likely Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 242348bb3766..cec79adbe3ea 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -4,6 +4,7 @@ * compatible drivers/servers. */ #include #include +#include /* The ID for virtio_net */ #define VIRTIO_ID_NET 1 -- cgit v1.3-8-gc7d7 From ae3abae64f177586be55b04a7fb7047a34b21a3e Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 30 Apr 2009 15:08:19 -0700 Subject: memcg: fix mem_cgroup_shrink_usage() Current mem_cgroup_shrink_usage() has two problems. 1. It doesn't call mem_cgroup_out_of_memory and doesn't update last_oom_jiffies, so pagefault_out_of_memory invokes global OOM. 2. Considering hierarchy, shrinking has to be done from the mem_over_limit, not from the memcg which the page would be charged to. mem_cgroup_try_charge_swapin() does all of these things properly, so we use it and call cancel_charge_swapin when it succeeded. The name of "shrink_usage" is not appropriate for this behavior, so we change it too. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Paul Menage Cc: Dhaval Giani Cc: Daisuke Nishimura Cc: YAMAMOTO Takashi Cc: KOSAKI Motohiro Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 33 ++++++++++++--------------------- mm/shmem.c | 8 ++++++-- 3 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a9e3b76aa884..25b9ca93d232 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,7 +56,7 @@ extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shrink_usage(struct page *page, +extern int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, @@ -155,7 +155,7 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shrink_usage(struct page *page, +static inline int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 575203ae2109..01c2d8f14685 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1617,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, } /* - * A call to try to shrink memory usage under specified resource controller. - * This is typically used for page reclaiming for shmem for reducing side - * effect of page allocation from shmem, which is used by some mem_cgroup. + * A call to try to shrink memory usage on charge failure at shmem's swapin. + * Calling hierarchical_reclaim is not enough because we should update + * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. + * Moreover considering hierarchy, we should reclaim from the mem_over_limit, + * not from the memcg which this page would be charged to. + * try_charge_swapin does all of these works properly. */ -int mem_cgroup_shrink_usage(struct page *page, +int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { struct mem_cgroup *mem = NULL; - int progress = 0; - int retry = MEM_CGROUP_RECLAIM_RETRIES; + int ret; if (mem_cgroup_disabled()) return 0; - if (page) - mem = try_get_mem_cgroup_from_swapcache(page); - if (!mem && mm) - mem = try_get_mem_cgroup_from_mm(mm); - if (unlikely(!mem)) - return 0; - do { - progress = mem_cgroup_hierarchical_reclaim(mem, - gfp_mask, true, false); - progress += mem_cgroup_check_under_limit(mem); - } while (!progress && --retry); + ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); + if (!ret) + mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - css_put(&mem->css); - if (!retry) - return -ENOMEM; - return 0; + return ret; } static DEFINE_MUTEX(set_limit_mutex); diff --git a/mm/shmem.c b/mm/shmem.c index f9cb20ebb990..b25f95ce3db7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1340,8 +1340,12 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); if (error == -ENOMEM) { - /* allow reclaim from this memory cgroup */ - error = mem_cgroup_shrink_usage(swappage, + /* + * reclaim from proper memory cgroup and + * call memcg's OOM if needed. + */ + error = mem_cgroup_shmem_charge_fallback( + swappage, current->mm, gfp); if (error) { -- cgit v1.3-8-gc7d7 From 74641f584da8eccf30becfbb5507ab457187db22 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 30 Apr 2009 15:08:49 -0700 Subject: alpha: binfmt_aout fix This fixes the problem introduced by commit 3bfacef412 (get rid of special-casing the /sbin/loader on alpha): osf/1 ecoff binary segfaults when binfmt_aout built as module. That happens because aout binary handler gets on the top of the binfmt list due to late registration, and kernel attempts to execute the binary without preparatory work that must be done by binfmt_loader. Fixed by changing the registration order of the default binfmt handlers using list_add_tail() and introducing insert_binfmt() function which places new handler on the top of the binfmt list. This might be generally useful for installing arch-specific frontends for default handlers or just for overriding them. Signed-off-by: Ivan Kokshaysky Cc: Al Viro Cc: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/Makefile | 6 +++++- arch/alpha/kernel/binfmt_loader.c | 2 +- fs/exec.c | 7 ++++--- include/linux/binfmts.h | 14 +++++++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index a427538252f8..7739a62440a7 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o + alpha_ksyms.o systbls.o err_common.o io.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o @@ -43,6 +43,10 @@ else # Misc support obj-$(CONFIG_ALPHA_SRM) += srmcons.o +ifdef CONFIG_BINFMT_AOUT +obj-y += binfmt_loader.o +endif + # Core logic support obj-$(CONFIG_ALPHA_APECS) += core_apecs.o obj-$(CONFIG_ALPHA_CIA) += core_cia.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c index 4a0af906b00a..3fcfad410130 100644 --- a/arch/alpha/kernel/binfmt_loader.c +++ b/arch/alpha/kernel/binfmt_loader.c @@ -46,6 +46,6 @@ static struct linux_binfmt loader_format = { static int __init init_loader_binfmt(void) { - return register_binfmt(&loader_format); + return insert_binfmt(&loader_format); } arch_initcall(init_loader_binfmt); diff --git a/fs/exec.c b/fs/exec.c index a3a8ce83940f..639177b0eeac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -69,17 +69,18 @@ int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int register_binfmt(struct linux_binfmt * fmt) +int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); - list_add(&fmt->lh, &formats); + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } -EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6638b8148de7..61ee18c1bdb4 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -82,7 +82,19 @@ struct linux_binfmt { int hasvdso; }; -extern int register_binfmt(struct linux_binfmt *); +extern int __register_binfmt(struct linux_binfmt *fmt, int insert); + +/* Registration of default binfmt handlers */ +static inline int register_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 0); +} +/* Same as above, but adds a new binfmt at the top of the list */ +static inline int insert_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 1); +} + extern void unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); -- cgit v1.3-8-gc7d7 From 0763ed2355198cdef2f6a2098e9d52eb1fe4365d Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 30 Apr 2009 15:08:50 -0700 Subject: of: make of_(un)register_platform_driver common code Some drivers using of_register_platform_driver() wrapper break on sparc because the wrapper isn't in the header file. This patch moves it from Microblaze and PowerPC implementations and makes it common code. Fixes this sparc64 allmodconfig build error (at least): drivers/leds/leds-gpio.c: In function `gpio_led_init': drivers/leds/leds-gpio.c:295: error: implicit declaration of function `of_register_platform_driver' drivers/leds/leds-gpio.c: In function `gpio_led_exit': drivers/leds/leds-gpio.c:311: error: implicit declaration of function `of_unregister_platform_driver' Signed-off-by: Grant Likely Acked-by: David S. Miller Cc: Michal Simek Cc: Benjamin Herrenschmidt Cc: Stephen Rothwell Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/of_platform.h | 10 ---------- arch/powerpc/include/asm/of_platform.h | 10 ---------- include/linux/of_platform.h | 10 ++++++++++ 3 files changed, 10 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/include/asm/of_platform.h b/arch/microblaze/include/asm/of_platform.h index 187c0eedaece..37491276c6ca 100644 --- a/arch/microblaze/include/asm/of_platform.h +++ b/arch/microblaze/include/asm/of_platform.h @@ -36,16 +36,6 @@ static const struct of_device_id of_default_bus_ids[] = { {}, }; -/* Platform drivers register/unregister */ -static inline int of_register_platform_driver(struct of_platform_driver *drv) -{ - return of_register_driver(drv, &of_platform_bus_type); -} -static inline void of_unregister_platform_driver(struct of_platform_driver *drv) -{ - of_unregister_driver(drv); -} - /* Platform devices and busses creation */ extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id, diff --git a/arch/powerpc/include/asm/of_platform.h b/arch/powerpc/include/asm/of_platform.h index 53b46507ffde..d4aaa3489440 100644 --- a/arch/powerpc/include/asm/of_platform.h +++ b/arch/powerpc/include/asm/of_platform.h @@ -11,16 +11,6 @@ * */ -/* Platform drivers register/unregister */ -static inline int of_register_platform_driver(struct of_platform_driver *drv) -{ - return of_register_driver(drv, &of_platform_bus_type); -} -static inline void of_unregister_platform_driver(struct of_platform_driver *drv) -{ - of_unregister_driver(drv); -} - /* Platform devices and busses creation */ extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id, diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 3d327b67d7e2..908406651330 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -51,6 +51,16 @@ extern int of_register_driver(struct of_platform_driver *drv, struct bus_type *bus); extern void of_unregister_driver(struct of_platform_driver *drv); +/* Platform drivers register/unregister */ +static inline int of_register_platform_driver(struct of_platform_driver *drv) +{ + return of_register_driver(drv, &of_platform_bus_type); +} +static inline void of_unregister_platform_driver(struct of_platform_driver *drv) +{ + of_unregister_driver(drv); +} + #include extern struct of_device *of_find_device_by_node(struct device_node *np); -- cgit v1.3-8-gc7d7 From 00a62ce91e554198ef28234c91c36f850f5a3bc9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 30 Apr 2009 15:08:51 -0700 Subject: mm: fix Committed_AS underflow on large NR_CPUS environment The Committed_AS field can underflow in certain situations: > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c > 1 Committed_AS: 18446744073709323392 kB > 11 Committed_AS: 18446744073709455488 kB > 6 Committed_AS: 35136 kB > 5 Committed_AS: 18446744073709454400 kB > 7 Committed_AS: 35904 kB > 3 Committed_AS: 18446744073709453248 kB > 2 Committed_AS: 34752 kB > 9 Committed_AS: 18446744073709453248 kB > 8 Committed_AS: 34752 kB > 3 Committed_AS: 18446744073709320960 kB > 7 Committed_AS: 18446744073709454080 kB > 3 Committed_AS: 18446744073709320960 kB > 5 Committed_AS: 18446744073709454080 kB > 6 Committed_AS: 18446744073709320960 kB Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does not check for underflow. But NR_CPUS proportional isn't good calculation. In general, possibility of lock contention is proportional to the number of online cpus, not theorical maximum cpus (NR_CPUS). The current kernel has generic percpu-counter stuff. using it is right way. it makes code simplify and percpu_counter_read_positive() don't make underflow issue. Reported-by: Dave Hansen Signed-off-by: KOSAKI Motohiro Cc: Eric B Munson Cc: Mel Gorman Cc: Christoph Lameter Cc: [All kernel versions] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- include/linux/mman.h | 9 +++------ mm/mmap.c | 12 ++++++------ mm/nommu.c | 13 +++++++------ mm/swap.c | 46 ---------------------------------------------- 5 files changed, 17 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 74ea974f5ca6..c6b0302af4c4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_long_read(&vm_committed_space); + committed = percpu_counter_read_positive(&vm_committed_as); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; diff --git a/include/linux/mman.h b/include/linux/mman.h index 30d1073bac3b..9872d6ca58ae 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -12,21 +12,18 @@ #ifdef __KERNEL__ #include +#include #include extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; -extern atomic_long_t vm_committed_space; +extern struct percpu_counter vm_committed_as; -#ifdef CONFIG_SMP -extern void vm_acct_memory(long pages); -#else static inline void vm_acct_memory(long pages) { - atomic_long_add(pages, &vm_committed_space); + percpu_counter_add(&vm_committed_as, pages); } -#endif static inline void vm_unacct_memory(long pages) { diff --git a/mm/mmap.c b/mm/mmap.c index 3303d1ba8e87..6b7b1a95944b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +struct percpu_counter vm_committed_as; /* * Check that a process has enough memory to allocate a new virtual @@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; error: vm_unacct_memory(pages); @@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm) */ void __init mmap_init(void) { + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..809998aa7b50 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -62,7 +62,7 @@ void *high_memory; struct page *mem_map; unsigned long max_mapnr; unsigned long num_physpages; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +struct percpu_counter vm_committed_as; int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; @@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ void __init mmap_init(void) { + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } @@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; + error: vm_unacct_memory(pages); diff --git a/mm/swap.c b/mm/swap.c index bede23ce64ea..cb29ae5d33ab 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, EXPORT_SYMBOL(pagevec_lookup_tag); -#ifdef CONFIG_SMP -/* - * We tolerate a little inaccuracy to avoid ping-ponging the counter between - * CPUs - */ -#define ACCT_THRESHOLD max(16, NR_CPUS * 2) - -static DEFINE_PER_CPU(long, committed_space); - -void vm_acct_memory(long pages) -{ - long *local; - - preempt_disable(); - local = &__get_cpu_var(committed_space); - *local += pages; - if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_long_add(*local, &vm_committed_space); - *local = 0; - } - preempt_enable(); -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Drop the CPU's cached committed space back into the central pool. */ -static int cpu_swap_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - long *committed; - - committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - atomic_long_add(*committed, &vm_committed_space); - *committed = 0; - drain_cpu_pagevecs((long)hcpu); - } - return NOTIFY_OK; -} -#endif /* CONFIG_HOTPLUG_CPU */ -#endif /* CONFIG_SMP */ - /* * Perform any setup for the swap system */ @@ -554,7 +511,4 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ -#ifdef CONFIG_HOTPLUG_CPU - hotcpu_notifier(cpu_swap_callback, 0); -#endif } -- cgit v1.3-8-gc7d7 From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 Apr 2009 17:18:20 -0400 Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect See http://bugzilla.kernel.org/show_bug.cgi?id=13034 If the port gets into a TIME_WAIT state, then we cannot reconnect without binding to a new port. Tested-by: Petr Vandrovec Tested-by: Jean Delvare Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 6 ++---- net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 1758d9f5b5c3..08afe43118f4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -261,6 +261,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) +#define XPRT_CONNECTION_CLOSE (8) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a0bfe53f1621..06ca058572f2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d40ff50887aa..e18596146013 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport) * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt_clear_connecting(xprt); return; } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); out_eagain: status = -EAGAIN; out: @@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; -- cgit v1.3-8-gc7d7 From 9f722c0978b04acba209f8ca1896ad05814bc3a3 Mon Sep 17 00:00:00 2001 From: Omar Laazimani Date: Mon, 4 May 2009 12:01:43 -0700 Subject: usbnet: CDC EEM support (v5) This introduces a CDC Ethernet Emulation Model (EEM) host side driver to support USB EEM devices. EEM is different from the Ethernet Control Model (ECM) currently supported by the "CDC Ethernet" driver. One key difference is that it doesn't require of USB interface alternate settings to manage interface state; some maldesigned hardware can't handle that part of USB. It also avoids a separate USB interface for control and status updates. [ dbrownell@users.sourceforge.net: fix skb leaks, add rx packet checks, improve fault handling, EEM conformance updates, cleanup ] Signed-off-by: Omar Laazimani Signed-off-by: David Brownell Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 14 ++ drivers/net/usb/Makefile | 1 + drivers/net/usb/cdc_eem.c | 381 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/cdc.h | 3 + 4 files changed, 399 insertions(+) create mode 100644 drivers/net/usb/cdc_eem.c (limited to 'include/linux') diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 8ee21030e9ac..dfc6cf765fbd 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -180,6 +180,20 @@ config USB_NET_CDCETHER IEEE 802 "local assignment" bit is set in the address, a "usbX" name is used instead. +config USB_NET_CDC_EEM + tristate "CDC EEM support" + depends on USB_USBNET && EXPERIMENTAL + help + This option supports devices conforming to the Communication Device + Class (CDC) Ethernet Emulation Model, a specification that's easy to + implement in device firmware. The CDC EEM specifications are available + from . + + This driver creates an interface named "ethX", where X depends on + what other networking devices you have in use. However, if the + IEEE 802 "local assignment" bit is set in the address, a "usbX" + name is used instead. + config USB_NET_DM9601 tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices" depends on USB_USBNET diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 88a87eeb376a..c8aef62cf2b7 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_USB_RTL8150) += rtl8150.o obj-$(CONFIG_USB_HSO) += hso.o obj-$(CONFIG_USB_NET_AX8817X) += asix.o obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o +obj-$(CONFIG_USB_NET_CDC_EEM) += cdc_eem.o obj-$(CONFIG_USB_NET_DM9601) += dm9601.o obj-$(CONFIG_USB_NET_SMSC95XX) += smsc95xx.o obj-$(CONFIG_USB_NET_GL620A) += gl620a.o diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c new file mode 100644 index 000000000000..80e01778dd3b --- /dev/null +++ b/drivers/net/usb/cdc_eem.c @@ -0,0 +1,381 @@ +/* + * USB CDC EEM network interface driver + * Copyright (C) 2009 Oberthur Technologies + * by Omar Laazimani, Olivier Condemine + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * This driver is an implementation of the CDC "Ethernet Emulation + * Model" (EEM) specification, which encapsulates Ethernet frames + * for transport over USB using a simpler USB device model than the + * previous CDC "Ethernet Control Model" (ECM, or "CDC Ethernet"). + * + * For details, see www.usb.org/developers/devclass_docs/CDC_EEM10.pdf + * + * This version has been tested with GIGAntIC WuaoW SIM Smart Card on 2.6.24, + * 2.6.27 and 2.6.30rc2 kernel. + * It has also been validated on Openmoko Om 2008.12 (based on 2.6.24 kernel). + * build on 23-April-2009 + */ + +#define EEM_HEAD 2 /* 2 byte header */ + +/*-------------------------------------------------------------------------*/ + +static void eem_linkcmd_complete(struct urb *urb) +{ + dev_kfree_skb(urb->context); + usb_free_urb(urb); +} + +static void eem_linkcmd(struct usbnet *dev, struct sk_buff *skb) +{ + struct urb *urb; + int status; + + urb = usb_alloc_urb(0, GFP_ATOMIC); + if (!urb) + goto fail; + + usb_fill_bulk_urb(urb, dev->udev, dev->out, + skb->data, skb->len, eem_linkcmd_complete, skb); + + status = usb_submit_urb(urb, GFP_ATOMIC); + if (status) { + usb_free_urb(urb); +fail: + dev_kfree_skb(skb); + devwarn(dev, "link cmd failure\n"); + return; + } +} + +static int eem_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int status = 0; + + status = usbnet_get_endpoints(dev, intf); + if (status < 0) { + usb_set_intfdata(intf, NULL); + usb_driver_release_interface(driver_of(intf), intf); + return status; + } + + /* no jumbogram (16K) support for now */ + + dev->net->hard_header_len += EEM_HEAD + ETH_FCS_LEN; + + return 0; +} + +/* + * EEM permits packing multiple Ethernet frames into USB transfers + * (a "bundle"), but for TX we don't try to do that. + */ +static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb, + gfp_t flags) +{ + struct sk_buff *skb2 = NULL; + u16 len = skb->len; + u32 crc = 0; + int padlen = 0; + + /* When ((len + EEM_HEAD + ETH_FCS_LEN) % dev->maxpacket) is + * zero, stick two bytes of zero length EEM packet on the end. + * Else the framework would add invalid single byte padding, + * since it can't know whether ZLPs will be handled right by + * all the relevant hardware and software. + */ + if (!((len + EEM_HEAD + ETH_FCS_LEN) % dev->maxpacket)) + padlen += 2; + + if (!skb_cloned(skb)) { + int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + + if ((tailroom >= ETH_FCS_LEN + padlen) + && (headroom >= EEM_HEAD)) + goto done; + + if ((headroom + tailroom) + > (EEM_HEAD + ETH_FCS_LEN + padlen)) { + skb->data = memmove(skb->head + + EEM_HEAD, + skb->data, + skb->len); + skb_set_tail_pointer(skb, len); + goto done; + } + } + + skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags); + if (!skb2) + return NULL; + + dev_kfree_skb_any(skb); + skb = skb2; + +done: + /* we don't use the "no Ethernet CRC" option */ + crc = crc32_le(~0, skb->data, skb->len); + crc = ~crc; + + put_unaligned_le32(crc, skb_put(skb, 4)); + + /* EEM packet header format: + * b0..13: length of ethernet frame + * b14: bmCRC (1 == valid Ethernet CRC) + * b15: bmType (0 == data) + */ + len = skb->len; + put_unaligned_le16(BIT(14) | len, skb_push(skb, 2)); + + /* Bundle a zero length EEM packet if needed */ + if (padlen) + put_unaligned_le16(0, skb_put(skb, 2)); + + return skb; +} + +static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + /* + * Our task here is to strip off framing, leaving skb with one + * data frame for the usbnet framework code to process. But we + * may have received multiple EEM payloads, or command payloads. + * So we must process _everything_ as if it's a header, except + * maybe the last data payload + * + * REVISIT the framework needs updating so that when we consume + * all payloads (the last or only message was a command, or a + * zero length EEM packet) that is not accounted as an rx_error. + */ + do { + struct sk_buff *skb2 = NULL; + u16 header; + u16 len = 0; + + /* incomplete EEM header? */ + if (skb->len < EEM_HEAD) + return 0; + + /* + * EEM packet header format: + * b0..14: EEM type dependant (Data or Command) + * b15: bmType + */ + header = get_unaligned_le16(skb->data); + skb_pull(skb, EEM_HEAD); + + /* + * The bmType bit helps to denote when EEM + * packet is data or command : + * bmType = 0 : EEM data payload + * bmType = 1 : EEM (link) command + */ + if (header & BIT(15)) { + u16 bmEEMCmd; + + /* + * EEM (link) command packet: + * b0..10: bmEEMCmdParam + * b11..13: bmEEMCmd + * b14: bmReserved (must be 0) + * b15: 1 (EEM command) + */ + if (header & BIT(14)) { + devdbg(dev, "reserved command %04x\n", header); + continue; + } + + bmEEMCmd = (header >> 11) & 0x7; + switch (bmEEMCmd) { + + /* Responding to echo requests is mandatory. */ + case 0: /* Echo command */ + len = header & 0x7FF; + + /* bogus command? */ + if (skb->len < len) + return 0; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + goto next; + skb_trim(skb2, len); + put_unaligned_le16(BIT(15) | (1 << 11) | len, + skb_push(skb2, 2)); + eem_linkcmd(dev, skb2); + break; + + /* + * Host may choose to ignore hints. + * - suspend: peripheral ready to suspend + * - response: suggest N millisec polling + * - response complete: suggest N sec polling + */ + case 2: /* Suspend hint */ + case 3: /* Response hint */ + case 4: /* Response complete hint */ + continue; + + /* + * Hosts should never receive host-to-peripheral + * or reserved command codes; or responses to an + * echo command we didn't send. + */ + case 1: /* Echo response */ + case 5: /* Tickle */ + default: /* reserved */ + devwarn(dev, "unexpected link command %d\n", + bmEEMCmd); + continue; + } + + } else { + u32 crc, crc2; + int is_last; + + /* zero length EEM packet? */ + if (header == 0) + continue; + + /* + * EEM data packet header : + * b0..13: length of ethernet frame + * b14: bmCRC + * b15: 0 (EEM data) + */ + len = header & 0x3FFF; + + /* bogus EEM payload? */ + if (skb->len < len) + return 0; + + /* bogus ethernet frame? */ + if (len < (ETH_HLEN + ETH_FCS_LEN)) + goto next; + + /* + * Treat the last payload differently: framework + * code expects our "fixup" to have stripped off + * headers, so "skb" is a data packet (or error). + * Else if it's not the last payload, keep "skb" + * for further processing. + */ + is_last = (len == skb->len); + if (is_last) + skb2 = skb; + else { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + return 0; + } + + crc = get_unaligned_le32(skb2->data + + len - ETH_FCS_LEN); + skb_trim(skb2, len - ETH_FCS_LEN); + + /* + * The bmCRC helps to denote when the CRC field in + * the Ethernet frame contains a calculated CRC: + * bmCRC = 1 : CRC is calculated + * bmCRC = 0 : CRC = 0xDEADBEEF + */ + if (header & BIT(14)) + crc2 = ~crc32_le(~0, skb2->data, len); + else + crc2 = 0xdeadbeef; + + if (is_last) + return crc == crc2; + + if (unlikely(crc != crc2)) { + dev->stats.rx_errors++; + dev_kfree_skb_any(skb2); + } else + usbnet_skb_return(dev, skb2); + } + +next: + skb_pull(skb, len); + } while (skb->len); + + return 1; +} + +static const struct driver_info eem_info = { + .description = "CDC EEM Device", + .flags = FLAG_ETHER, + .bind = eem_bind, + .rx_fixup = eem_rx_fixup, + .tx_fixup = eem_tx_fixup, +}; + +/*-------------------------------------------------------------------------*/ + +static const struct usb_device_id products[] = { +{ + USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_EEM, + USB_CDC_PROTO_EEM), + .driver_info = (unsigned long) &eem_info, +}, +{ + /* EMPTY == end of list */ +}, +}; +MODULE_DEVICE_TABLE(usb, products); + +static struct usb_driver eem_driver = { + .name = "cdc_eem", + .id_table = products, + .probe = usbnet_probe, + .disconnect = usbnet_disconnect, + .suspend = usbnet_suspend, + .resume = usbnet_resume, +}; + + +static int __init eem_init(void) +{ + return usb_register(&eem_driver); +} +module_init(eem_init); + +static void __exit eem_exit(void) +{ + usb_deregister(&eem_driver); +} +module_exit(eem_exit); + +MODULE_AUTHOR("Omar Laazimani "); +MODULE_DESCRIPTION("USB CDC EEM"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 3c86ed25a04c..c24124a42ce5 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -17,6 +17,7 @@ #define USB_CDC_SUBCLASS_DMM 0x09 #define USB_CDC_SUBCLASS_MDLM 0x0a #define USB_CDC_SUBCLASS_OBEX 0x0b +#define USB_CDC_SUBCLASS_EEM 0x0c #define USB_CDC_PROTO_NONE 0 @@ -28,6 +29,8 @@ #define USB_CDC_ACM_PROTO_AT_CDMA 6 #define USB_CDC_ACM_PROTO_VENDOR 0xff +#define USB_CDC_PROTO_EEM 7 + /*-------------------------------------------------------------------------*/ /* -- cgit v1.3-8-gc7d7